Claudio Aracena
first model
1421945
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 788652,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.996830034032755e-05,
"loss": 7.8831,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9936600680655096e-05,
"loss": 7.6494,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.990490102098264e-05,
"loss": 7.6212,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.987320136131019e-05,
"loss": 7.6134,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.984150170163773e-05,
"loss": 7.5908,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.980980204196528e-05,
"loss": 7.5855,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.9778102382292826e-05,
"loss": 7.5888,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.974640272262037e-05,
"loss": 7.5772,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.971470306294792e-05,
"loss": 7.5693,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.968300340327546e-05,
"loss": 7.5633,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.965130374360301e-05,
"loss": 7.567,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.961960408393056e-05,
"loss": 7.5412,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.95879044242581e-05,
"loss": 7.5584,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 4.955620476458565e-05,
"loss": 7.5604,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.952450510491319e-05,
"loss": 7.568,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.949280544524074e-05,
"loss": 7.5403,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.946110578556829e-05,
"loss": 7.5384,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.9429406125895836e-05,
"loss": 7.552,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.939770646622338e-05,
"loss": 7.553,
"step": 9500
},
{
"epoch": 0.04,
"learning_rate": 4.936600680655093e-05,
"loss": 7.5243,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.933430714687847e-05,
"loss": 7.5329,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.930260748720602e-05,
"loss": 7.5257,
"step": 11000
},
{
"epoch": 0.04,
"learning_rate": 4.9270907827533566e-05,
"loss": 7.5282,
"step": 11500
},
{
"epoch": 0.05,
"learning_rate": 4.923920816786111e-05,
"loss": 7.5224,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 4.920750850818866e-05,
"loss": 7.5279,
"step": 12500
},
{
"epoch": 0.05,
"learning_rate": 4.91758088485162e-05,
"loss": 7.5301,
"step": 13000
},
{
"epoch": 0.05,
"learning_rate": 4.914410918884375e-05,
"loss": 7.5161,
"step": 13500
},
{
"epoch": 0.05,
"learning_rate": 4.9112409529171297e-05,
"loss": 7.5328,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 4.908070986949884e-05,
"loss": 7.5159,
"step": 14500
},
{
"epoch": 0.06,
"learning_rate": 4.904901020982639e-05,
"loss": 7.505,
"step": 15000
},
{
"epoch": 0.06,
"learning_rate": 4.901731055015393e-05,
"loss": 7.5269,
"step": 15500
},
{
"epoch": 0.06,
"learning_rate": 4.898561089048148e-05,
"loss": 7.5221,
"step": 16000
},
{
"epoch": 0.06,
"learning_rate": 4.895391123080903e-05,
"loss": 7.5086,
"step": 16500
},
{
"epoch": 0.06,
"learning_rate": 4.892221157113657e-05,
"loss": 7.5176,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 4.889051191146412e-05,
"loss": 7.5047,
"step": 17500
},
{
"epoch": 0.07,
"learning_rate": 4.885881225179167e-05,
"loss": 7.5244,
"step": 18000
},
{
"epoch": 0.07,
"learning_rate": 4.8827112592119214e-05,
"loss": 7.5216,
"step": 18500
},
{
"epoch": 0.07,
"learning_rate": 4.879541293244676e-05,
"loss": 7.5125,
"step": 19000
},
{
"epoch": 0.07,
"learning_rate": 4.876371327277431e-05,
"loss": 7.5166,
"step": 19500
},
{
"epoch": 0.08,
"learning_rate": 4.873201361310185e-05,
"loss": 7.5145,
"step": 20000
},
{
"epoch": 0.08,
"learning_rate": 4.87003139534294e-05,
"loss": 7.5154,
"step": 20500
},
{
"epoch": 0.08,
"learning_rate": 4.8668614293756944e-05,
"loss": 7.4982,
"step": 21000
},
{
"epoch": 0.08,
"learning_rate": 4.8636914634084494e-05,
"loss": 7.5113,
"step": 21500
},
{
"epoch": 0.08,
"learning_rate": 4.860521497441204e-05,
"loss": 7.5053,
"step": 22000
},
{
"epoch": 0.09,
"learning_rate": 4.857351531473958e-05,
"loss": 7.5311,
"step": 22500
},
{
"epoch": 0.09,
"learning_rate": 4.854181565506713e-05,
"loss": 7.4977,
"step": 23000
},
{
"epoch": 0.09,
"learning_rate": 4.851011599539468e-05,
"loss": 7.5142,
"step": 23500
},
{
"epoch": 0.09,
"learning_rate": 4.8478416335722224e-05,
"loss": 7.5061,
"step": 24000
},
{
"epoch": 0.09,
"learning_rate": 4.844671667604977e-05,
"loss": 7.519,
"step": 24500
},
{
"epoch": 0.1,
"learning_rate": 4.841501701637731e-05,
"loss": 7.5055,
"step": 25000
},
{
"epoch": 0.1,
"learning_rate": 4.838331735670486e-05,
"loss": 7.5135,
"step": 25500
},
{
"epoch": 0.1,
"learning_rate": 4.835161769703241e-05,
"loss": 7.5041,
"step": 26000
},
{
"epoch": 0.1,
"learning_rate": 4.8319918037359954e-05,
"loss": 7.5036,
"step": 26500
},
{
"epoch": 0.1,
"learning_rate": 4.82882183776875e-05,
"loss": 7.526,
"step": 27000
},
{
"epoch": 0.1,
"learning_rate": 4.825651871801505e-05,
"loss": 7.5069,
"step": 27500
},
{
"epoch": 0.11,
"learning_rate": 4.822481905834259e-05,
"loss": 7.4989,
"step": 28000
},
{
"epoch": 0.11,
"learning_rate": 4.819311939867014e-05,
"loss": 7.5217,
"step": 28500
},
{
"epoch": 0.11,
"learning_rate": 4.8161419738997684e-05,
"loss": 7.4926,
"step": 29000
},
{
"epoch": 0.11,
"learning_rate": 4.812972007932523e-05,
"loss": 7.5044,
"step": 29500
},
{
"epoch": 0.11,
"learning_rate": 4.809802041965278e-05,
"loss": 7.4951,
"step": 30000
},
{
"epoch": 0.12,
"learning_rate": 4.806632075998032e-05,
"loss": 7.5096,
"step": 30500
},
{
"epoch": 0.12,
"learning_rate": 4.803462110030787e-05,
"loss": 7.5154,
"step": 31000
},
{
"epoch": 0.12,
"learning_rate": 4.800292144063542e-05,
"loss": 7.5022,
"step": 31500
},
{
"epoch": 0.12,
"learning_rate": 4.797122178096296e-05,
"loss": 7.5204,
"step": 32000
},
{
"epoch": 0.12,
"learning_rate": 4.793952212129051e-05,
"loss": 7.5097,
"step": 32500
},
{
"epoch": 0.13,
"learning_rate": 4.790782246161806e-05,
"loss": 7.5172,
"step": 33000
},
{
"epoch": 0.13,
"learning_rate": 4.78761228019456e-05,
"loss": 7.4931,
"step": 33500
},
{
"epoch": 0.13,
"learning_rate": 4.784442314227315e-05,
"loss": 7.4993,
"step": 34000
},
{
"epoch": 0.13,
"learning_rate": 4.781272348260069e-05,
"loss": 7.4984,
"step": 34500
},
{
"epoch": 0.13,
"learning_rate": 4.778102382292824e-05,
"loss": 7.5244,
"step": 35000
},
{
"epoch": 0.14,
"learning_rate": 4.774932416325579e-05,
"loss": 7.504,
"step": 35500
},
{
"epoch": 0.14,
"learning_rate": 4.771762450358333e-05,
"loss": 7.5031,
"step": 36000
},
{
"epoch": 0.14,
"learning_rate": 4.768592484391088e-05,
"loss": 7.5034,
"step": 36500
},
{
"epoch": 0.14,
"learning_rate": 4.7654225184238425e-05,
"loss": 7.5217,
"step": 37000
},
{
"epoch": 0.14,
"learning_rate": 4.762252552456597e-05,
"loss": 7.5025,
"step": 37500
},
{
"epoch": 0.14,
"learning_rate": 4.759082586489352e-05,
"loss": 7.5059,
"step": 38000
},
{
"epoch": 0.15,
"learning_rate": 4.755912620522106e-05,
"loss": 7.5178,
"step": 38500
},
{
"epoch": 0.15,
"learning_rate": 4.752742654554861e-05,
"loss": 7.5004,
"step": 39000
},
{
"epoch": 0.15,
"learning_rate": 4.7495726885876155e-05,
"loss": 7.5094,
"step": 39500
},
{
"epoch": 0.15,
"learning_rate": 4.74640272262037e-05,
"loss": 7.4932,
"step": 40000
},
{
"epoch": 0.15,
"learning_rate": 4.743232756653125e-05,
"loss": 7.5062,
"step": 40500
},
{
"epoch": 0.16,
"learning_rate": 4.74006279068588e-05,
"loss": 7.5029,
"step": 41000
},
{
"epoch": 0.16,
"learning_rate": 4.736892824718634e-05,
"loss": 7.5028,
"step": 41500
},
{
"epoch": 0.16,
"learning_rate": 4.7337228587513885e-05,
"loss": 7.5218,
"step": 42000
},
{
"epoch": 0.16,
"learning_rate": 4.730552892784143e-05,
"loss": 7.4965,
"step": 42500
},
{
"epoch": 0.16,
"learning_rate": 4.727382926816898e-05,
"loss": 7.5068,
"step": 43000
},
{
"epoch": 0.17,
"learning_rate": 4.724212960849653e-05,
"loss": 7.5086,
"step": 43500
},
{
"epoch": 0.17,
"learning_rate": 4.721042994882407e-05,
"loss": 7.4934,
"step": 44000
},
{
"epoch": 0.17,
"learning_rate": 4.7178730289151615e-05,
"loss": 7.4922,
"step": 44500
},
{
"epoch": 0.17,
"learning_rate": 4.7147030629479165e-05,
"loss": 7.4961,
"step": 45000
},
{
"epoch": 0.17,
"learning_rate": 4.711533096980671e-05,
"loss": 7.5111,
"step": 45500
},
{
"epoch": 0.17,
"learning_rate": 4.708363131013426e-05,
"loss": 7.4964,
"step": 46000
},
{
"epoch": 0.18,
"learning_rate": 4.70519316504618e-05,
"loss": 7.5062,
"step": 46500
},
{
"epoch": 0.18,
"learning_rate": 4.7020231990789345e-05,
"loss": 7.4992,
"step": 47000
},
{
"epoch": 0.18,
"learning_rate": 4.6988532331116895e-05,
"loss": 7.5116,
"step": 47500
},
{
"epoch": 0.18,
"learning_rate": 4.695683267144444e-05,
"loss": 7.5012,
"step": 48000
},
{
"epoch": 0.18,
"learning_rate": 4.692513301177199e-05,
"loss": 7.4854,
"step": 48500
},
{
"epoch": 0.19,
"learning_rate": 4.689343335209954e-05,
"loss": 7.4943,
"step": 49000
},
{
"epoch": 0.19,
"learning_rate": 4.686173369242708e-05,
"loss": 7.498,
"step": 49500
},
{
"epoch": 0.19,
"learning_rate": 4.6830034032754625e-05,
"loss": 7.4868,
"step": 50000
},
{
"epoch": 0.19,
"learning_rate": 4.6798334373082175e-05,
"loss": 7.4862,
"step": 50500
},
{
"epoch": 0.19,
"learning_rate": 4.676663471340972e-05,
"loss": 7.4992,
"step": 51000
},
{
"epoch": 0.2,
"learning_rate": 4.673493505373727e-05,
"loss": 7.4996,
"step": 51500
},
{
"epoch": 0.2,
"learning_rate": 4.670323539406481e-05,
"loss": 7.499,
"step": 52000
},
{
"epoch": 0.2,
"learning_rate": 4.6671535734392356e-05,
"loss": 7.503,
"step": 52500
},
{
"epoch": 0.2,
"learning_rate": 4.6639836074719906e-05,
"loss": 7.488,
"step": 53000
},
{
"epoch": 0.2,
"learning_rate": 4.660813641504745e-05,
"loss": 7.4991,
"step": 53500
},
{
"epoch": 0.21,
"learning_rate": 4.6576436755375e-05,
"loss": 7.4968,
"step": 54000
},
{
"epoch": 0.21,
"learning_rate": 4.654473709570254e-05,
"loss": 7.5004,
"step": 54500
},
{
"epoch": 0.21,
"learning_rate": 4.6513037436030086e-05,
"loss": 7.494,
"step": 55000
},
{
"epoch": 0.21,
"learning_rate": 4.6481337776357636e-05,
"loss": 7.4935,
"step": 55500
},
{
"epoch": 0.21,
"learning_rate": 4.644963811668518e-05,
"loss": 7.5,
"step": 56000
},
{
"epoch": 0.21,
"learning_rate": 4.641793845701273e-05,
"loss": 7.4858,
"step": 56500
},
{
"epoch": 0.22,
"learning_rate": 4.638623879734027e-05,
"loss": 7.5052,
"step": 57000
},
{
"epoch": 0.22,
"learning_rate": 4.6354539137667816e-05,
"loss": 7.5023,
"step": 57500
},
{
"epoch": 0.22,
"learning_rate": 4.6322839477995366e-05,
"loss": 7.4919,
"step": 58000
},
{
"epoch": 0.22,
"learning_rate": 4.6291139818322916e-05,
"loss": 7.49,
"step": 58500
},
{
"epoch": 0.22,
"learning_rate": 4.625944015865046e-05,
"loss": 7.5039,
"step": 59000
},
{
"epoch": 0.23,
"learning_rate": 4.622774049897801e-05,
"loss": 7.4817,
"step": 59500
},
{
"epoch": 0.23,
"learning_rate": 4.6196040839305546e-05,
"loss": 7.4938,
"step": 60000
},
{
"epoch": 0.23,
"learning_rate": 4.6164341179633096e-05,
"loss": 7.496,
"step": 60500
},
{
"epoch": 0.23,
"learning_rate": 4.6132641519960646e-05,
"loss": 7.4985,
"step": 61000
},
{
"epoch": 0.23,
"learning_rate": 4.610094186028819e-05,
"loss": 7.4996,
"step": 61500
},
{
"epoch": 0.24,
"learning_rate": 4.606924220061574e-05,
"loss": 7.5,
"step": 62000
},
{
"epoch": 0.24,
"learning_rate": 4.603754254094328e-05,
"loss": 7.4978,
"step": 62500
},
{
"epoch": 0.24,
"learning_rate": 4.6005842881270826e-05,
"loss": 7.4842,
"step": 63000
},
{
"epoch": 0.24,
"learning_rate": 4.5974143221598376e-05,
"loss": 7.5213,
"step": 63500
},
{
"epoch": 0.24,
"learning_rate": 4.5942443561925926e-05,
"loss": 7.4911,
"step": 64000
},
{
"epoch": 0.25,
"learning_rate": 4.591074390225347e-05,
"loss": 7.4901,
"step": 64500
},
{
"epoch": 0.25,
"learning_rate": 4.587904424258101e-05,
"loss": 7.5091,
"step": 65000
},
{
"epoch": 0.25,
"learning_rate": 4.5847344582908556e-05,
"loss": 7.5016,
"step": 65500
},
{
"epoch": 0.25,
"learning_rate": 4.5815644923236106e-05,
"loss": 7.4955,
"step": 66000
},
{
"epoch": 0.25,
"learning_rate": 4.5783945263563656e-05,
"loss": 7.484,
"step": 66500
},
{
"epoch": 0.25,
"learning_rate": 4.57522456038912e-05,
"loss": 7.4943,
"step": 67000
},
{
"epoch": 0.26,
"learning_rate": 4.572054594421874e-05,
"loss": 7.4962,
"step": 67500
},
{
"epoch": 0.26,
"learning_rate": 4.568884628454629e-05,
"loss": 7.4952,
"step": 68000
},
{
"epoch": 0.26,
"learning_rate": 4.5657146624873837e-05,
"loss": 7.5072,
"step": 68500
},
{
"epoch": 0.26,
"learning_rate": 4.5625446965201387e-05,
"loss": 7.5096,
"step": 69000
},
{
"epoch": 0.26,
"learning_rate": 4.559374730552893e-05,
"loss": 7.49,
"step": 69500
},
{
"epoch": 0.27,
"learning_rate": 4.556204764585647e-05,
"loss": 7.4966,
"step": 70000
},
{
"epoch": 0.27,
"learning_rate": 4.553034798618402e-05,
"loss": 7.5115,
"step": 70500
},
{
"epoch": 0.27,
"learning_rate": 4.549864832651157e-05,
"loss": 7.503,
"step": 71000
},
{
"epoch": 0.27,
"learning_rate": 4.546694866683912e-05,
"loss": 7.4929,
"step": 71500
},
{
"epoch": 0.27,
"learning_rate": 4.543524900716667e-05,
"loss": 7.4934,
"step": 72000
},
{
"epoch": 0.28,
"learning_rate": 4.54035493474942e-05,
"loss": 7.4957,
"step": 72500
},
{
"epoch": 0.28,
"learning_rate": 4.5371849687821753e-05,
"loss": 7.4916,
"step": 73000
},
{
"epoch": 0.28,
"learning_rate": 4.53401500281493e-05,
"loss": 7.5026,
"step": 73500
},
{
"epoch": 0.28,
"learning_rate": 4.530845036847685e-05,
"loss": 7.5125,
"step": 74000
},
{
"epoch": 0.28,
"learning_rate": 4.52767507088044e-05,
"loss": 7.4996,
"step": 74500
},
{
"epoch": 0.29,
"learning_rate": 4.5245051049131933e-05,
"loss": 7.4954,
"step": 75000
},
{
"epoch": 0.29,
"learning_rate": 4.5213351389459484e-05,
"loss": 7.4997,
"step": 75500
},
{
"epoch": 0.29,
"learning_rate": 4.5181651729787034e-05,
"loss": 7.5005,
"step": 76000
},
{
"epoch": 0.29,
"learning_rate": 4.514995207011458e-05,
"loss": 7.4982,
"step": 76500
},
{
"epoch": 0.29,
"learning_rate": 4.511825241044213e-05,
"loss": 7.4946,
"step": 77000
},
{
"epoch": 0.29,
"learning_rate": 4.508655275076967e-05,
"loss": 7.4983,
"step": 77500
},
{
"epoch": 0.3,
"learning_rate": 4.5054853091097214e-05,
"loss": 7.5142,
"step": 78000
},
{
"epoch": 0.3,
"learning_rate": 4.5023153431424764e-05,
"loss": 7.4921,
"step": 78500
},
{
"epoch": 0.3,
"learning_rate": 4.499145377175231e-05,
"loss": 7.4931,
"step": 79000
},
{
"epoch": 0.3,
"learning_rate": 4.495975411207986e-05,
"loss": 7.5017,
"step": 79500
},
{
"epoch": 0.3,
"learning_rate": 4.49280544524074e-05,
"loss": 7.4859,
"step": 80000
},
{
"epoch": 0.31,
"learning_rate": 4.4896354792734944e-05,
"loss": 7.4899,
"step": 80500
},
{
"epoch": 0.31,
"learning_rate": 4.4864655133062494e-05,
"loss": 7.4942,
"step": 81000
},
{
"epoch": 0.31,
"learning_rate": 4.4832955473390044e-05,
"loss": 7.4778,
"step": 81500
},
{
"epoch": 0.31,
"learning_rate": 4.480125581371759e-05,
"loss": 7.5,
"step": 82000
},
{
"epoch": 0.31,
"learning_rate": 4.476955615404513e-05,
"loss": 7.5007,
"step": 82500
},
{
"epoch": 0.32,
"learning_rate": 4.4737856494372674e-05,
"loss": 7.4928,
"step": 83000
},
{
"epoch": 0.32,
"learning_rate": 4.4706156834700224e-05,
"loss": 7.4768,
"step": 83500
},
{
"epoch": 0.32,
"learning_rate": 4.4674457175027774e-05,
"loss": 7.4966,
"step": 84000
},
{
"epoch": 0.32,
"learning_rate": 4.464275751535532e-05,
"loss": 7.5056,
"step": 84500
},
{
"epoch": 0.32,
"learning_rate": 4.461105785568286e-05,
"loss": 7.4967,
"step": 85000
},
{
"epoch": 0.33,
"learning_rate": 4.457935819601041e-05,
"loss": 7.493,
"step": 85500
},
{
"epoch": 0.33,
"learning_rate": 4.4547658536337954e-05,
"loss": 7.5024,
"step": 86000
},
{
"epoch": 0.33,
"learning_rate": 4.4515958876665504e-05,
"loss": 7.4954,
"step": 86500
},
{
"epoch": 0.33,
"learning_rate": 4.448425921699305e-05,
"loss": 7.493,
"step": 87000
},
{
"epoch": 0.33,
"learning_rate": 4.44525595573206e-05,
"loss": 7.5053,
"step": 87500
},
{
"epoch": 0.33,
"learning_rate": 4.442085989764814e-05,
"loss": 7.4864,
"step": 88000
},
{
"epoch": 0.34,
"learning_rate": 4.4389160237975684e-05,
"loss": 7.4929,
"step": 88500
},
{
"epoch": 0.34,
"learning_rate": 4.4357460578303234e-05,
"loss": 7.4995,
"step": 89000
},
{
"epoch": 0.34,
"learning_rate": 4.4325760918630785e-05,
"loss": 7.4882,
"step": 89500
},
{
"epoch": 0.34,
"learning_rate": 4.429406125895833e-05,
"loss": 7.5016,
"step": 90000
},
{
"epoch": 0.34,
"learning_rate": 4.426236159928587e-05,
"loss": 7.4996,
"step": 90500
},
{
"epoch": 0.35,
"learning_rate": 4.4230661939613414e-05,
"loss": 7.486,
"step": 91000
},
{
"epoch": 0.35,
"learning_rate": 4.4198962279940965e-05,
"loss": 7.5026,
"step": 91500
},
{
"epoch": 0.35,
"learning_rate": 4.4167262620268515e-05,
"loss": 7.4921,
"step": 92000
},
{
"epoch": 0.35,
"learning_rate": 4.413556296059606e-05,
"loss": 7.5054,
"step": 92500
},
{
"epoch": 0.35,
"learning_rate": 4.41038633009236e-05,
"loss": 7.4965,
"step": 93000
},
{
"epoch": 0.36,
"learning_rate": 4.407216364125115e-05,
"loss": 7.4943,
"step": 93500
},
{
"epoch": 0.36,
"learning_rate": 4.4040463981578695e-05,
"loss": 7.4944,
"step": 94000
},
{
"epoch": 0.36,
"learning_rate": 4.4008764321906245e-05,
"loss": 7.4919,
"step": 94500
},
{
"epoch": 0.36,
"learning_rate": 4.397706466223379e-05,
"loss": 7.4955,
"step": 95000
},
{
"epoch": 0.36,
"learning_rate": 4.394536500256133e-05,
"loss": 7.5051,
"step": 95500
},
{
"epoch": 0.37,
"learning_rate": 4.391366534288888e-05,
"loss": 7.5002,
"step": 96000
},
{
"epoch": 0.37,
"learning_rate": 4.3881965683216425e-05,
"loss": 7.5037,
"step": 96500
},
{
"epoch": 0.37,
"learning_rate": 4.3850266023543975e-05,
"loss": 7.4848,
"step": 97000
},
{
"epoch": 0.37,
"learning_rate": 4.381856636387152e-05,
"loss": 7.5064,
"step": 97500
},
{
"epoch": 0.37,
"learning_rate": 4.378686670419906e-05,
"loss": 7.4905,
"step": 98000
},
{
"epoch": 0.37,
"learning_rate": 4.375516704452661e-05,
"loss": 7.4929,
"step": 98500
},
{
"epoch": 0.38,
"learning_rate": 4.372346738485416e-05,
"loss": 7.5115,
"step": 99000
},
{
"epoch": 0.38,
"learning_rate": 4.3691767725181705e-05,
"loss": 7.4942,
"step": 99500
},
{
"epoch": 0.38,
"learning_rate": 4.3660068065509255e-05,
"loss": 7.5059,
"step": 100000
},
{
"epoch": 0.38,
"learning_rate": 4.362836840583679e-05,
"loss": 7.4867,
"step": 100500
},
{
"epoch": 0.38,
"learning_rate": 4.359666874616434e-05,
"loss": 7.4966,
"step": 101000
},
{
"epoch": 0.39,
"learning_rate": 4.356496908649189e-05,
"loss": 7.4955,
"step": 101500
},
{
"epoch": 0.39,
"learning_rate": 4.3533269426819435e-05,
"loss": 7.4877,
"step": 102000
},
{
"epoch": 0.39,
"learning_rate": 4.3501569767146985e-05,
"loss": 7.4893,
"step": 102500
},
{
"epoch": 0.39,
"learning_rate": 4.346987010747453e-05,
"loss": 7.4861,
"step": 103000
},
{
"epoch": 0.39,
"learning_rate": 4.343817044780207e-05,
"loss": 7.4841,
"step": 103500
},
{
"epoch": 0.4,
"learning_rate": 4.340647078812962e-05,
"loss": 7.49,
"step": 104000
},
{
"epoch": 0.4,
"learning_rate": 4.3374771128457165e-05,
"loss": 7.4895,
"step": 104500
},
{
"epoch": 0.4,
"learning_rate": 4.3343071468784715e-05,
"loss": 7.5027,
"step": 105000
},
{
"epoch": 0.4,
"learning_rate": 4.331137180911226e-05,
"loss": 7.493,
"step": 105500
},
{
"epoch": 0.4,
"learning_rate": 4.32796721494398e-05,
"loss": 7.4999,
"step": 106000
},
{
"epoch": 0.41,
"learning_rate": 4.324797248976735e-05,
"loss": 7.5011,
"step": 106500
},
{
"epoch": 0.41,
"learning_rate": 4.32162728300949e-05,
"loss": 7.503,
"step": 107000
},
{
"epoch": 0.41,
"learning_rate": 4.3184573170422446e-05,
"loss": 7.4797,
"step": 107500
},
{
"epoch": 0.41,
"learning_rate": 4.315287351074999e-05,
"loss": 7.4771,
"step": 108000
},
{
"epoch": 0.41,
"learning_rate": 4.312117385107753e-05,
"loss": 7.5038,
"step": 108500
},
{
"epoch": 0.41,
"learning_rate": 4.308947419140508e-05,
"loss": 7.4926,
"step": 109000
},
{
"epoch": 0.42,
"learning_rate": 4.305777453173263e-05,
"loss": 7.5078,
"step": 109500
},
{
"epoch": 0.42,
"learning_rate": 4.3026074872060176e-05,
"loss": 7.4931,
"step": 110000
},
{
"epoch": 0.42,
"learning_rate": 4.299437521238772e-05,
"loss": 7.4927,
"step": 110500
},
{
"epoch": 0.42,
"learning_rate": 4.296267555271527e-05,
"loss": 7.5058,
"step": 111000
},
{
"epoch": 0.42,
"learning_rate": 4.293097589304281e-05,
"loss": 7.4955,
"step": 111500
},
{
"epoch": 0.43,
"learning_rate": 4.289927623337036e-05,
"loss": 7.4994,
"step": 112000
},
{
"epoch": 0.43,
"learning_rate": 4.286757657369791e-05,
"loss": 7.5213,
"step": 112500
},
{
"epoch": 0.43,
"learning_rate": 4.283587691402545e-05,
"loss": 7.4715,
"step": 113000
},
{
"epoch": 0.43,
"learning_rate": 4.2804177254353e-05,
"loss": 7.5062,
"step": 113500
},
{
"epoch": 0.43,
"learning_rate": 4.277247759468054e-05,
"loss": 7.5004,
"step": 114000
},
{
"epoch": 0.44,
"learning_rate": 4.274077793500809e-05,
"loss": 7.4991,
"step": 114500
},
{
"epoch": 0.44,
"learning_rate": 4.270907827533564e-05,
"loss": 7.4989,
"step": 115000
},
{
"epoch": 0.44,
"learning_rate": 4.2677378615663186e-05,
"loss": 7.4709,
"step": 115500
},
{
"epoch": 0.44,
"learning_rate": 4.264567895599073e-05,
"loss": 7.4851,
"step": 116000
},
{
"epoch": 0.44,
"learning_rate": 4.261397929631828e-05,
"loss": 7.49,
"step": 116500
},
{
"epoch": 0.45,
"learning_rate": 4.258227963664582e-05,
"loss": 7.4925,
"step": 117000
},
{
"epoch": 0.45,
"learning_rate": 4.255057997697337e-05,
"loss": 7.4997,
"step": 117500
},
{
"epoch": 0.45,
"learning_rate": 4.2518880317300916e-05,
"loss": 7.5011,
"step": 118000
},
{
"epoch": 0.45,
"learning_rate": 4.248718065762846e-05,
"loss": 7.495,
"step": 118500
},
{
"epoch": 0.45,
"learning_rate": 4.245548099795601e-05,
"loss": 7.4749,
"step": 119000
},
{
"epoch": 0.45,
"learning_rate": 4.242378133828355e-05,
"loss": 7.5037,
"step": 119500
},
{
"epoch": 0.46,
"learning_rate": 4.23920816786111e-05,
"loss": 7.4854,
"step": 120000
},
{
"epoch": 0.46,
"learning_rate": 4.2360382018938646e-05,
"loss": 7.5,
"step": 120500
},
{
"epoch": 0.46,
"learning_rate": 4.232868235926619e-05,
"loss": 7.5072,
"step": 121000
},
{
"epoch": 0.46,
"learning_rate": 4.229698269959374e-05,
"loss": 7.4904,
"step": 121500
},
{
"epoch": 0.46,
"learning_rate": 4.226528303992128e-05,
"loss": 7.4916,
"step": 122000
},
{
"epoch": 0.47,
"learning_rate": 4.223358338024883e-05,
"loss": 7.4959,
"step": 122500
},
{
"epoch": 0.47,
"learning_rate": 4.2201883720576376e-05,
"loss": 7.5056,
"step": 123000
},
{
"epoch": 0.47,
"learning_rate": 4.217018406090392e-05,
"loss": 7.4923,
"step": 123500
},
{
"epoch": 0.47,
"learning_rate": 4.213848440123147e-05,
"loss": 7.4946,
"step": 124000
},
{
"epoch": 0.47,
"learning_rate": 4.210678474155902e-05,
"loss": 7.4985,
"step": 124500
},
{
"epoch": 0.48,
"learning_rate": 4.207508508188656e-05,
"loss": 7.4835,
"step": 125000
},
{
"epoch": 0.48,
"learning_rate": 4.2043385422214107e-05,
"loss": 7.4963,
"step": 125500
},
{
"epoch": 0.48,
"learning_rate": 4.201168576254165e-05,
"loss": 7.4945,
"step": 126000
},
{
"epoch": 0.48,
"learning_rate": 4.19799861028692e-05,
"loss": 7.4981,
"step": 126500
},
{
"epoch": 0.48,
"learning_rate": 4.194828644319675e-05,
"loss": 7.4966,
"step": 127000
},
{
"epoch": 0.49,
"learning_rate": 4.1916586783524293e-05,
"loss": 7.4856,
"step": 127500
},
{
"epoch": 0.49,
"learning_rate": 4.1884887123851843e-05,
"loss": 7.4974,
"step": 128000
},
{
"epoch": 0.49,
"learning_rate": 4.185318746417939e-05,
"loss": 7.4956,
"step": 128500
},
{
"epoch": 0.49,
"learning_rate": 4.182148780450693e-05,
"loss": 7.4918,
"step": 129000
},
{
"epoch": 0.49,
"learning_rate": 4.178978814483448e-05,
"loss": 7.5015,
"step": 129500
},
{
"epoch": 0.49,
"learning_rate": 4.175808848516203e-05,
"loss": 7.4968,
"step": 130000
},
{
"epoch": 0.5,
"learning_rate": 4.1726388825489574e-05,
"loss": 7.497,
"step": 130500
},
{
"epoch": 0.5,
"learning_rate": 4.169468916581712e-05,
"loss": 7.4941,
"step": 131000
},
{
"epoch": 0.5,
"learning_rate": 4.166298950614466e-05,
"loss": 7.5052,
"step": 131500
},
{
"epoch": 0.5,
"learning_rate": 4.163128984647221e-05,
"loss": 7.4969,
"step": 132000
},
{
"epoch": 0.5,
"learning_rate": 4.159959018679976e-05,
"loss": 7.4822,
"step": 132500
},
{
"epoch": 0.51,
"learning_rate": 4.1567890527127304e-05,
"loss": 7.4896,
"step": 133000
},
{
"epoch": 0.51,
"learning_rate": 4.153619086745485e-05,
"loss": 7.5044,
"step": 133500
},
{
"epoch": 0.51,
"learning_rate": 4.15044912077824e-05,
"loss": 7.4871,
"step": 134000
},
{
"epoch": 0.51,
"learning_rate": 4.147279154810994e-05,
"loss": 7.4949,
"step": 134500
},
{
"epoch": 0.51,
"learning_rate": 4.144109188843749e-05,
"loss": 7.4831,
"step": 135000
},
{
"epoch": 0.52,
"learning_rate": 4.1409392228765034e-05,
"loss": 7.4978,
"step": 135500
},
{
"epoch": 0.52,
"learning_rate": 4.137769256909258e-05,
"loss": 7.5074,
"step": 136000
},
{
"epoch": 0.52,
"learning_rate": 4.134599290942013e-05,
"loss": 7.4895,
"step": 136500
},
{
"epoch": 0.52,
"learning_rate": 4.131429324974767e-05,
"loss": 7.4818,
"step": 137000
},
{
"epoch": 0.52,
"learning_rate": 4.128259359007522e-05,
"loss": 7.5038,
"step": 137500
},
{
"epoch": 0.52,
"learning_rate": 4.125089393040277e-05,
"loss": 7.5023,
"step": 138000
},
{
"epoch": 0.53,
"learning_rate": 4.121919427073031e-05,
"loss": 7.4888,
"step": 138500
},
{
"epoch": 0.53,
"learning_rate": 4.118749461105786e-05,
"loss": 7.4901,
"step": 139000
},
{
"epoch": 0.53,
"learning_rate": 4.11557949513854e-05,
"loss": 7.4867,
"step": 139500
},
{
"epoch": 0.53,
"learning_rate": 4.112409529171295e-05,
"loss": 7.5052,
"step": 140000
},
{
"epoch": 0.53,
"learning_rate": 4.10923956320405e-05,
"loss": 7.4933,
"step": 140500
},
{
"epoch": 0.54,
"learning_rate": 4.106069597236804e-05,
"loss": 7.4912,
"step": 141000
},
{
"epoch": 0.54,
"learning_rate": 4.102899631269559e-05,
"loss": 7.497,
"step": 141500
},
{
"epoch": 0.54,
"learning_rate": 4.099729665302314e-05,
"loss": 7.4989,
"step": 142000
},
{
"epoch": 0.54,
"learning_rate": 4.096559699335068e-05,
"loss": 7.486,
"step": 142500
},
{
"epoch": 0.54,
"learning_rate": 4.093389733367823e-05,
"loss": 7.4852,
"step": 143000
},
{
"epoch": 0.55,
"learning_rate": 4.0902197674005774e-05,
"loss": 7.4835,
"step": 143500
},
{
"epoch": 0.55,
"learning_rate": 4.087049801433332e-05,
"loss": 7.4888,
"step": 144000
},
{
"epoch": 0.55,
"learning_rate": 4.083879835466087e-05,
"loss": 7.492,
"step": 144500
},
{
"epoch": 0.55,
"learning_rate": 4.080709869498841e-05,
"loss": 7.4884,
"step": 145000
},
{
"epoch": 0.55,
"learning_rate": 4.077539903531596e-05,
"loss": 7.4859,
"step": 145500
},
{
"epoch": 0.56,
"learning_rate": 4.0743699375643505e-05,
"loss": 7.4929,
"step": 146000
},
{
"epoch": 0.56,
"learning_rate": 4.071199971597105e-05,
"loss": 7.4726,
"step": 146500
},
{
"epoch": 0.56,
"learning_rate": 4.06803000562986e-05,
"loss": 7.4954,
"step": 147000
},
{
"epoch": 0.56,
"learning_rate": 4.064860039662615e-05,
"loss": 7.4859,
"step": 147500
},
{
"epoch": 0.56,
"learning_rate": 4.061690073695369e-05,
"loss": 7.4997,
"step": 148000
},
{
"epoch": 0.56,
"learning_rate": 4.0585201077281235e-05,
"loss": 7.4929,
"step": 148500
},
{
"epoch": 0.57,
"learning_rate": 4.055350141760878e-05,
"loss": 7.5029,
"step": 149000
},
{
"epoch": 0.57,
"learning_rate": 4.052180175793633e-05,
"loss": 7.4995,
"step": 149500
},
{
"epoch": 0.57,
"learning_rate": 4.049010209826388e-05,
"loss": 7.4838,
"step": 150000
},
{
"epoch": 0.57,
"learning_rate": 4.045840243859142e-05,
"loss": 7.4977,
"step": 150500
},
{
"epoch": 0.57,
"learning_rate": 4.0426702778918965e-05,
"loss": 7.4772,
"step": 151000
},
{
"epoch": 0.58,
"learning_rate": 4.0395003119246515e-05,
"loss": 7.4779,
"step": 151500
},
{
"epoch": 0.58,
"learning_rate": 4.036330345957406e-05,
"loss": 7.4878,
"step": 152000
},
{
"epoch": 0.58,
"learning_rate": 4.033160379990161e-05,
"loss": 7.4886,
"step": 152500
},
{
"epoch": 0.58,
"learning_rate": 4.029990414022915e-05,
"loss": 7.4924,
"step": 153000
},
{
"epoch": 0.58,
"learning_rate": 4.0268204480556695e-05,
"loss": 7.4925,
"step": 153500
},
{
"epoch": 0.59,
"learning_rate": 4.0236504820884245e-05,
"loss": 7.4942,
"step": 154000
},
{
"epoch": 0.59,
"learning_rate": 4.020480516121179e-05,
"loss": 7.4917,
"step": 154500
},
{
"epoch": 0.59,
"learning_rate": 4.017310550153934e-05,
"loss": 7.4959,
"step": 155000
},
{
"epoch": 0.59,
"learning_rate": 4.014140584186689e-05,
"loss": 7.5003,
"step": 155500
},
{
"epoch": 0.59,
"learning_rate": 4.010970618219443e-05,
"loss": 7.5046,
"step": 156000
},
{
"epoch": 0.6,
"learning_rate": 4.0078006522521975e-05,
"loss": 7.4904,
"step": 156500
},
{
"epoch": 0.6,
"learning_rate": 4.004630686284952e-05,
"loss": 7.4885,
"step": 157000
},
{
"epoch": 0.6,
"learning_rate": 4.001460720317707e-05,
"loss": 7.4943,
"step": 157500
},
{
"epoch": 0.6,
"learning_rate": 3.998290754350462e-05,
"loss": 7.4867,
"step": 158000
},
{
"epoch": 0.6,
"learning_rate": 3.995120788383216e-05,
"loss": 7.4789,
"step": 158500
},
{
"epoch": 0.6,
"learning_rate": 3.9919508224159705e-05,
"loss": 7.491,
"step": 159000
},
{
"epoch": 0.61,
"learning_rate": 3.9887808564487255e-05,
"loss": 7.4918,
"step": 159500
},
{
"epoch": 0.61,
"learning_rate": 3.98561089048148e-05,
"loss": 7.4913,
"step": 160000
},
{
"epoch": 0.61,
"learning_rate": 3.982440924514235e-05,
"loss": 7.4989,
"step": 160500
},
{
"epoch": 0.61,
"learning_rate": 3.979270958546989e-05,
"loss": 7.4798,
"step": 161000
},
{
"epoch": 0.61,
"learning_rate": 3.9761009925797435e-05,
"loss": 7.4842,
"step": 161500
},
{
"epoch": 0.62,
"learning_rate": 3.9729310266124985e-05,
"loss": 7.5008,
"step": 162000
},
{
"epoch": 0.62,
"learning_rate": 3.969761060645253e-05,
"loss": 7.4923,
"step": 162500
},
{
"epoch": 0.62,
"learning_rate": 3.966591094678008e-05,
"loss": 7.4895,
"step": 163000
},
{
"epoch": 0.62,
"learning_rate": 3.963421128710762e-05,
"loss": 7.4881,
"step": 163500
},
{
"epoch": 0.62,
"learning_rate": 3.9602511627435166e-05,
"loss": 7.4888,
"step": 164000
},
{
"epoch": 0.63,
"learning_rate": 3.9570811967762716e-05,
"loss": 7.4991,
"step": 164500
},
{
"epoch": 0.63,
"learning_rate": 3.9539112308090266e-05,
"loss": 7.4991,
"step": 165000
},
{
"epoch": 0.63,
"learning_rate": 3.950741264841781e-05,
"loss": 7.4837,
"step": 165500
},
{
"epoch": 0.63,
"learning_rate": 3.947571298874536e-05,
"loss": 7.4836,
"step": 166000
},
{
"epoch": 0.63,
"learning_rate": 3.9444013329072896e-05,
"loss": 7.5083,
"step": 166500
},
{
"epoch": 0.64,
"learning_rate": 3.9412313669400446e-05,
"loss": 7.4966,
"step": 167000
},
{
"epoch": 0.64,
"learning_rate": 3.9380614009727996e-05,
"loss": 7.494,
"step": 167500
},
{
"epoch": 0.64,
"learning_rate": 3.934891435005554e-05,
"loss": 7.4945,
"step": 168000
},
{
"epoch": 0.64,
"learning_rate": 3.931721469038309e-05,
"loss": 7.4825,
"step": 168500
},
{
"epoch": 0.64,
"learning_rate": 3.928551503071063e-05,
"loss": 7.5042,
"step": 169000
},
{
"epoch": 0.64,
"learning_rate": 3.9253815371038176e-05,
"loss": 7.4825,
"step": 169500
},
{
"epoch": 0.65,
"learning_rate": 3.9222115711365726e-05,
"loss": 7.501,
"step": 170000
},
{
"epoch": 0.65,
"learning_rate": 3.919041605169327e-05,
"loss": 7.5055,
"step": 170500
},
{
"epoch": 0.65,
"learning_rate": 3.915871639202082e-05,
"loss": 7.4882,
"step": 171000
},
{
"epoch": 0.65,
"learning_rate": 3.912701673234836e-05,
"loss": 7.4768,
"step": 171500
},
{
"epoch": 0.65,
"learning_rate": 3.9095317072675906e-05,
"loss": 7.4929,
"step": 172000
},
{
"epoch": 0.66,
"learning_rate": 3.9063617413003456e-05,
"loss": 7.4906,
"step": 172500
},
{
"epoch": 0.66,
"learning_rate": 3.9031917753331006e-05,
"loss": 7.4936,
"step": 173000
},
{
"epoch": 0.66,
"learning_rate": 3.900021809365855e-05,
"loss": 7.5112,
"step": 173500
},
{
"epoch": 0.66,
"learning_rate": 3.896851843398609e-05,
"loss": 7.4947,
"step": 174000
},
{
"epoch": 0.66,
"learning_rate": 3.8936818774313636e-05,
"loss": 7.4788,
"step": 174500
},
{
"epoch": 0.67,
"learning_rate": 3.8905119114641186e-05,
"loss": 7.4922,
"step": 175000
},
{
"epoch": 0.67,
"learning_rate": 3.8873419454968736e-05,
"loss": 7.5001,
"step": 175500
},
{
"epoch": 0.67,
"learning_rate": 3.884171979529628e-05,
"loss": 7.4882,
"step": 176000
},
{
"epoch": 0.67,
"learning_rate": 3.881002013562382e-05,
"loss": 7.5024,
"step": 176500
},
{
"epoch": 0.67,
"learning_rate": 3.877832047595137e-05,
"loss": 7.4859,
"step": 177000
},
{
"epoch": 0.68,
"learning_rate": 3.8746620816278916e-05,
"loss": 7.4837,
"step": 177500
},
{
"epoch": 0.68,
"learning_rate": 3.8714921156606466e-05,
"loss": 7.5076,
"step": 178000
},
{
"epoch": 0.68,
"learning_rate": 3.8683221496934017e-05,
"loss": 7.4998,
"step": 178500
},
{
"epoch": 0.68,
"learning_rate": 3.865152183726155e-05,
"loss": 7.491,
"step": 179000
},
{
"epoch": 0.68,
"learning_rate": 3.86198221775891e-05,
"loss": 7.4899,
"step": 179500
},
{
"epoch": 0.68,
"learning_rate": 3.8588122517916647e-05,
"loss": 7.488,
"step": 180000
},
{
"epoch": 0.69,
"learning_rate": 3.8556422858244197e-05,
"loss": 7.4895,
"step": 180500
},
{
"epoch": 0.69,
"learning_rate": 3.852472319857175e-05,
"loss": 7.4976,
"step": 181000
},
{
"epoch": 0.69,
"learning_rate": 3.849302353889928e-05,
"loss": 7.505,
"step": 181500
},
{
"epoch": 0.69,
"learning_rate": 3.846132387922683e-05,
"loss": 7.4959,
"step": 182000
},
{
"epoch": 0.69,
"learning_rate": 3.8429624219554383e-05,
"loss": 7.4872,
"step": 182500
},
{
"epoch": 0.7,
"learning_rate": 3.839792455988193e-05,
"loss": 7.5021,
"step": 183000
},
{
"epoch": 0.7,
"learning_rate": 3.836622490020948e-05,
"loss": 7.4847,
"step": 183500
},
{
"epoch": 0.7,
"learning_rate": 3.833452524053702e-05,
"loss": 7.4968,
"step": 184000
},
{
"epoch": 0.7,
"learning_rate": 3.8302825580864563e-05,
"loss": 7.4772,
"step": 184500
},
{
"epoch": 0.7,
"learning_rate": 3.8271125921192114e-05,
"loss": 7.479,
"step": 185000
},
{
"epoch": 0.71,
"learning_rate": 3.823942626151966e-05,
"loss": 7.4952,
"step": 185500
},
{
"epoch": 0.71,
"learning_rate": 3.820772660184721e-05,
"loss": 7.4871,
"step": 186000
},
{
"epoch": 0.71,
"learning_rate": 3.817602694217475e-05,
"loss": 7.4915,
"step": 186500
},
{
"epoch": 0.71,
"learning_rate": 3.8144327282502294e-05,
"loss": 7.486,
"step": 187000
},
{
"epoch": 0.71,
"learning_rate": 3.8112627622829844e-05,
"loss": 7.4672,
"step": 187500
},
{
"epoch": 0.72,
"learning_rate": 3.808092796315739e-05,
"loss": 7.4786,
"step": 188000
},
{
"epoch": 0.72,
"learning_rate": 3.804922830348494e-05,
"loss": 7.4983,
"step": 188500
},
{
"epoch": 0.72,
"learning_rate": 3.801752864381248e-05,
"loss": 7.5075,
"step": 189000
},
{
"epoch": 0.72,
"learning_rate": 3.7985828984140024e-05,
"loss": 7.4893,
"step": 189500
},
{
"epoch": 0.72,
"learning_rate": 3.7954129324467574e-05,
"loss": 7.5036,
"step": 190000
},
{
"epoch": 0.72,
"learning_rate": 3.7922429664795124e-05,
"loss": 7.4761,
"step": 190500
},
{
"epoch": 0.73,
"learning_rate": 3.789073000512267e-05,
"loss": 7.4805,
"step": 191000
},
{
"epoch": 0.73,
"learning_rate": 3.785903034545021e-05,
"loss": 7.4845,
"step": 191500
},
{
"epoch": 0.73,
"learning_rate": 3.782733068577776e-05,
"loss": 7.4894,
"step": 192000
},
{
"epoch": 0.73,
"learning_rate": 3.7795631026105304e-05,
"loss": 7.4943,
"step": 192500
},
{
"epoch": 0.73,
"learning_rate": 3.7763931366432854e-05,
"loss": 7.4942,
"step": 193000
},
{
"epoch": 0.74,
"learning_rate": 3.77322317067604e-05,
"loss": 7.4964,
"step": 193500
},
{
"epoch": 0.74,
"learning_rate": 3.770053204708795e-05,
"loss": 7.4966,
"step": 194000
},
{
"epoch": 0.74,
"learning_rate": 3.766883238741549e-05,
"loss": 7.5028,
"step": 194500
},
{
"epoch": 0.74,
"learning_rate": 3.7637132727743034e-05,
"loss": 7.4922,
"step": 195000
},
{
"epoch": 0.74,
"learning_rate": 3.7605433068070584e-05,
"loss": 7.5029,
"step": 195500
},
{
"epoch": 0.75,
"learning_rate": 3.7573733408398134e-05,
"loss": 7.48,
"step": 196000
},
{
"epoch": 0.75,
"learning_rate": 3.754203374872568e-05,
"loss": 7.4906,
"step": 196500
},
{
"epoch": 0.75,
"learning_rate": 3.751033408905322e-05,
"loss": 7.4937,
"step": 197000
},
{
"epoch": 0.75,
"learning_rate": 3.7478634429380764e-05,
"loss": 7.4733,
"step": 197500
},
{
"epoch": 0.75,
"learning_rate": 3.7446934769708314e-05,
"loss": 7.5046,
"step": 198000
},
{
"epoch": 0.76,
"learning_rate": 3.7415235110035864e-05,
"loss": 7.4886,
"step": 198500
},
{
"epoch": 0.76,
"learning_rate": 3.738353545036341e-05,
"loss": 7.4883,
"step": 199000
},
{
"epoch": 0.76,
"learning_rate": 3.735183579069095e-05,
"loss": 7.4905,
"step": 199500
},
{
"epoch": 0.76,
"learning_rate": 3.73201361310185e-05,
"loss": 7.4878,
"step": 200000
},
{
"epoch": 0.76,
"learning_rate": 3.7288436471346044e-05,
"loss": 7.4861,
"step": 200500
},
{
"epoch": 0.76,
"learning_rate": 3.7256736811673595e-05,
"loss": 7.4833,
"step": 201000
},
{
"epoch": 0.77,
"learning_rate": 3.722503715200114e-05,
"loss": 7.4814,
"step": 201500
},
{
"epoch": 0.77,
"learning_rate": 3.719333749232868e-05,
"loss": 7.4885,
"step": 202000
},
{
"epoch": 0.77,
"learning_rate": 3.716163783265623e-05,
"loss": 7.4981,
"step": 202500
},
{
"epoch": 0.77,
"learning_rate": 3.7129938172983775e-05,
"loss": 7.5081,
"step": 203000
},
{
"epoch": 0.77,
"learning_rate": 3.7098238513311325e-05,
"loss": 7.5021,
"step": 203500
},
{
"epoch": 0.78,
"learning_rate": 3.706653885363887e-05,
"loss": 7.4927,
"step": 204000
},
{
"epoch": 0.78,
"learning_rate": 3.703483919396641e-05,
"loss": 7.4876,
"step": 204500
},
{
"epoch": 0.78,
"learning_rate": 3.700313953429396e-05,
"loss": 7.4899,
"step": 205000
},
{
"epoch": 0.78,
"learning_rate": 3.6971439874621505e-05,
"loss": 7.4842,
"step": 205500
},
{
"epoch": 0.78,
"learning_rate": 3.6939740214949055e-05,
"loss": 7.4956,
"step": 206000
},
{
"epoch": 0.79,
"learning_rate": 3.6908040555276605e-05,
"loss": 7.4852,
"step": 206500
},
{
"epoch": 0.79,
"learning_rate": 3.687634089560414e-05,
"loss": 7.5173,
"step": 207000
},
{
"epoch": 0.79,
"learning_rate": 3.684464123593169e-05,
"loss": 7.4874,
"step": 207500
},
{
"epoch": 0.79,
"learning_rate": 3.681294157625924e-05,
"loss": 7.4758,
"step": 208000
},
{
"epoch": 0.79,
"learning_rate": 3.6781241916586785e-05,
"loss": 7.4979,
"step": 208500
},
{
"epoch": 0.8,
"learning_rate": 3.6749542256914335e-05,
"loss": 7.4813,
"step": 209000
},
{
"epoch": 0.8,
"learning_rate": 3.671784259724188e-05,
"loss": 7.4912,
"step": 209500
},
{
"epoch": 0.8,
"learning_rate": 3.668614293756942e-05,
"loss": 7.496,
"step": 210000
},
{
"epoch": 0.8,
"learning_rate": 3.665444327789697e-05,
"loss": 7.5007,
"step": 210500
},
{
"epoch": 0.8,
"learning_rate": 3.6622743618224515e-05,
"loss": 7.4935,
"step": 211000
},
{
"epoch": 0.8,
"learning_rate": 3.6591043958552065e-05,
"loss": 7.4968,
"step": 211500
},
{
"epoch": 0.81,
"learning_rate": 3.655934429887961e-05,
"loss": 7.4897,
"step": 212000
},
{
"epoch": 0.81,
"learning_rate": 3.652764463920715e-05,
"loss": 7.4955,
"step": 212500
},
{
"epoch": 0.81,
"learning_rate": 3.64959449795347e-05,
"loss": 7.5055,
"step": 213000
},
{
"epoch": 0.81,
"learning_rate": 3.646424531986225e-05,
"loss": 7.4854,
"step": 213500
},
{
"epoch": 0.81,
"learning_rate": 3.6432545660189795e-05,
"loss": 7.5171,
"step": 214000
},
{
"epoch": 0.82,
"learning_rate": 3.640084600051734e-05,
"loss": 7.4996,
"step": 214500
},
{
"epoch": 0.82,
"learning_rate": 3.636914634084488e-05,
"loss": 7.4906,
"step": 215000
},
{
"epoch": 0.82,
"learning_rate": 3.633744668117243e-05,
"loss": 7.4752,
"step": 215500
},
{
"epoch": 0.82,
"learning_rate": 3.630574702149998e-05,
"loss": 7.4973,
"step": 216000
},
{
"epoch": 0.82,
"learning_rate": 3.6274047361827525e-05,
"loss": 7.4903,
"step": 216500
},
{
"epoch": 0.83,
"learning_rate": 3.624234770215507e-05,
"loss": 7.4801,
"step": 217000
},
{
"epoch": 0.83,
"learning_rate": 3.621064804248262e-05,
"loss": 7.4868,
"step": 217500
},
{
"epoch": 0.83,
"learning_rate": 3.617894838281016e-05,
"loss": 7.4925,
"step": 218000
},
{
"epoch": 0.83,
"learning_rate": 3.614724872313771e-05,
"loss": 7.4862,
"step": 218500
},
{
"epoch": 0.83,
"learning_rate": 3.6115549063465256e-05,
"loss": 7.4982,
"step": 219000
},
{
"epoch": 0.83,
"learning_rate": 3.60838494037928e-05,
"loss": 7.4778,
"step": 219500
},
{
"epoch": 0.84,
"learning_rate": 3.605214974412035e-05,
"loss": 7.5073,
"step": 220000
},
{
"epoch": 0.84,
"learning_rate": 3.602045008444789e-05,
"loss": 7.4798,
"step": 220500
},
{
"epoch": 0.84,
"learning_rate": 3.598875042477544e-05,
"loss": 7.4791,
"step": 221000
},
{
"epoch": 0.84,
"learning_rate": 3.595705076510299e-05,
"loss": 7.4905,
"step": 221500
},
{
"epoch": 0.84,
"learning_rate": 3.5925351105430536e-05,
"loss": 7.4877,
"step": 222000
},
{
"epoch": 0.85,
"learning_rate": 3.589365144575808e-05,
"loss": 7.4945,
"step": 222500
},
{
"epoch": 0.85,
"learning_rate": 3.586195178608562e-05,
"loss": 7.4819,
"step": 223000
},
{
"epoch": 0.85,
"learning_rate": 3.583025212641317e-05,
"loss": 7.4984,
"step": 223500
},
{
"epoch": 0.85,
"learning_rate": 3.579855246674072e-05,
"loss": 7.5007,
"step": 224000
},
{
"epoch": 0.85,
"learning_rate": 3.5766852807068266e-05,
"loss": 7.4948,
"step": 224500
},
{
"epoch": 0.86,
"learning_rate": 3.573515314739581e-05,
"loss": 7.5001,
"step": 225000
},
{
"epoch": 0.86,
"learning_rate": 3.570345348772336e-05,
"loss": 7.4981,
"step": 225500
},
{
"epoch": 0.86,
"learning_rate": 3.56717538280509e-05,
"loss": 7.4831,
"step": 226000
},
{
"epoch": 0.86,
"learning_rate": 3.564005416837845e-05,
"loss": 7.4941,
"step": 226500
},
{
"epoch": 0.86,
"learning_rate": 3.5608354508705996e-05,
"loss": 7.4782,
"step": 227000
},
{
"epoch": 0.87,
"learning_rate": 3.557665484903354e-05,
"loss": 7.4813,
"step": 227500
},
{
"epoch": 0.87,
"learning_rate": 3.554495518936109e-05,
"loss": 7.4642,
"step": 228000
},
{
"epoch": 0.87,
"learning_rate": 3.551325552968863e-05,
"loss": 7.4816,
"step": 228500
},
{
"epoch": 0.87,
"learning_rate": 3.548155587001618e-05,
"loss": 7.509,
"step": 229000
},
{
"epoch": 0.87,
"learning_rate": 3.5449856210343726e-05,
"loss": 7.4822,
"step": 229500
},
{
"epoch": 0.87,
"learning_rate": 3.541815655067127e-05,
"loss": 7.4954,
"step": 230000
},
{
"epoch": 0.88,
"learning_rate": 3.538645689099882e-05,
"loss": 7.4901,
"step": 230500
},
{
"epoch": 0.88,
"learning_rate": 3.535475723132637e-05,
"loss": 7.496,
"step": 231000
},
{
"epoch": 0.88,
"learning_rate": 3.532305757165391e-05,
"loss": 7.4993,
"step": 231500
},
{
"epoch": 0.88,
"learning_rate": 3.5291357911981456e-05,
"loss": 7.4869,
"step": 232000
},
{
"epoch": 0.88,
"learning_rate": 3.5259658252309e-05,
"loss": 7.4902,
"step": 232500
},
{
"epoch": 0.89,
"learning_rate": 3.522795859263655e-05,
"loss": 7.5146,
"step": 233000
},
{
"epoch": 0.89,
"learning_rate": 3.51962589329641e-05,
"loss": 7.4939,
"step": 233500
},
{
"epoch": 0.89,
"learning_rate": 3.516455927329164e-05,
"loss": 7.4795,
"step": 234000
},
{
"epoch": 0.89,
"learning_rate": 3.513285961361919e-05,
"loss": 7.4806,
"step": 234500
},
{
"epoch": 0.89,
"learning_rate": 3.5101159953946737e-05,
"loss": 7.4974,
"step": 235000
},
{
"epoch": 0.9,
"learning_rate": 3.506946029427428e-05,
"loss": 7.4941,
"step": 235500
},
{
"epoch": 0.9,
"learning_rate": 3.503776063460183e-05,
"loss": 7.4865,
"step": 236000
},
{
"epoch": 0.9,
"learning_rate": 3.500606097492937e-05,
"loss": 7.4884,
"step": 236500
},
{
"epoch": 0.9,
"learning_rate": 3.497436131525692e-05,
"loss": 7.4856,
"step": 237000
},
{
"epoch": 0.9,
"learning_rate": 3.494266165558447e-05,
"loss": 7.4888,
"step": 237500
},
{
"epoch": 0.91,
"learning_rate": 3.491096199591201e-05,
"loss": 7.4799,
"step": 238000
},
{
"epoch": 0.91,
"learning_rate": 3.487926233623956e-05,
"loss": 7.503,
"step": 238500
},
{
"epoch": 0.91,
"learning_rate": 3.484756267656711e-05,
"loss": 7.4893,
"step": 239000
},
{
"epoch": 0.91,
"learning_rate": 3.4815863016894653e-05,
"loss": 7.4764,
"step": 239500
},
{
"epoch": 0.91,
"learning_rate": 3.47841633572222e-05,
"loss": 7.4831,
"step": 240000
},
{
"epoch": 0.91,
"learning_rate": 3.475246369754975e-05,
"loss": 7.4935,
"step": 240500
},
{
"epoch": 0.92,
"learning_rate": 3.472076403787729e-05,
"loss": 7.4765,
"step": 241000
},
{
"epoch": 0.92,
"learning_rate": 3.468906437820484e-05,
"loss": 7.4874,
"step": 241500
},
{
"epoch": 0.92,
"learning_rate": 3.4657364718532384e-05,
"loss": 7.4747,
"step": 242000
},
{
"epoch": 0.92,
"learning_rate": 3.462566505885993e-05,
"loss": 7.4763,
"step": 242500
},
{
"epoch": 0.92,
"learning_rate": 3.459396539918748e-05,
"loss": 7.4834,
"step": 243000
},
{
"epoch": 0.93,
"learning_rate": 3.456226573951502e-05,
"loss": 7.4977,
"step": 243500
},
{
"epoch": 0.93,
"learning_rate": 3.453056607984257e-05,
"loss": 7.4831,
"step": 244000
},
{
"epoch": 0.93,
"learning_rate": 3.449886642017012e-05,
"loss": 7.4984,
"step": 244500
},
{
"epoch": 0.93,
"learning_rate": 3.446716676049766e-05,
"loss": 7.4969,
"step": 245000
},
{
"epoch": 0.93,
"learning_rate": 3.443546710082521e-05,
"loss": 7.4881,
"step": 245500
},
{
"epoch": 0.94,
"learning_rate": 3.440376744115275e-05,
"loss": 7.4912,
"step": 246000
},
{
"epoch": 0.94,
"learning_rate": 3.43720677814803e-05,
"loss": 7.4867,
"step": 246500
},
{
"epoch": 0.94,
"learning_rate": 3.434036812180785e-05,
"loss": 7.4966,
"step": 247000
},
{
"epoch": 0.94,
"learning_rate": 3.430866846213539e-05,
"loss": 7.498,
"step": 247500
},
{
"epoch": 0.94,
"learning_rate": 3.427696880246294e-05,
"loss": 7.4854,
"step": 248000
},
{
"epoch": 0.95,
"learning_rate": 3.424526914279049e-05,
"loss": 7.5006,
"step": 248500
},
{
"epoch": 0.95,
"learning_rate": 3.421356948311803e-05,
"loss": 7.4864,
"step": 249000
},
{
"epoch": 0.95,
"learning_rate": 3.418186982344558e-05,
"loss": 7.4976,
"step": 249500
},
{
"epoch": 0.95,
"learning_rate": 3.4150170163773124e-05,
"loss": 7.5078,
"step": 250000
},
{
"epoch": 0.95,
"learning_rate": 3.411847050410067e-05,
"loss": 7.4829,
"step": 250500
},
{
"epoch": 0.95,
"learning_rate": 3.408677084442822e-05,
"loss": 7.477,
"step": 251000
},
{
"epoch": 0.96,
"learning_rate": 3.405507118475576e-05,
"loss": 7.4765,
"step": 251500
},
{
"epoch": 0.96,
"learning_rate": 3.402337152508331e-05,
"loss": 7.4881,
"step": 252000
},
{
"epoch": 0.96,
"learning_rate": 3.3991671865410854e-05,
"loss": 7.481,
"step": 252500
},
{
"epoch": 0.96,
"learning_rate": 3.39599722057384e-05,
"loss": 7.4841,
"step": 253000
},
{
"epoch": 0.96,
"learning_rate": 3.392827254606595e-05,
"loss": 7.4838,
"step": 253500
},
{
"epoch": 0.97,
"learning_rate": 3.389657288639349e-05,
"loss": 7.4762,
"step": 254000
},
{
"epoch": 0.97,
"learning_rate": 3.386487322672104e-05,
"loss": 7.4998,
"step": 254500
},
{
"epoch": 0.97,
"learning_rate": 3.3833173567048584e-05,
"loss": 7.4876,
"step": 255000
},
{
"epoch": 0.97,
"learning_rate": 3.380147390737613e-05,
"loss": 7.4914,
"step": 255500
},
{
"epoch": 0.97,
"learning_rate": 3.376977424770368e-05,
"loss": 7.505,
"step": 256000
},
{
"epoch": 0.98,
"learning_rate": 3.373807458803123e-05,
"loss": 7.493,
"step": 256500
},
{
"epoch": 0.98,
"learning_rate": 3.370637492835877e-05,
"loss": 7.4727,
"step": 257000
},
{
"epoch": 0.98,
"learning_rate": 3.3674675268686315e-05,
"loss": 7.4961,
"step": 257500
},
{
"epoch": 0.98,
"learning_rate": 3.3642975609013865e-05,
"loss": 7.4797,
"step": 258000
},
{
"epoch": 0.98,
"learning_rate": 3.361127594934141e-05,
"loss": 7.4963,
"step": 258500
},
{
"epoch": 0.99,
"learning_rate": 3.357957628966896e-05,
"loss": 7.4847,
"step": 259000
},
{
"epoch": 0.99,
"learning_rate": 3.35478766299965e-05,
"loss": 7.4999,
"step": 259500
},
{
"epoch": 0.99,
"learning_rate": 3.3516176970324045e-05,
"loss": 7.4945,
"step": 260000
},
{
"epoch": 0.99,
"learning_rate": 3.3484477310651595e-05,
"loss": 7.4948,
"step": 260500
},
{
"epoch": 0.99,
"learning_rate": 3.345277765097914e-05,
"loss": 7.482,
"step": 261000
},
{
"epoch": 0.99,
"learning_rate": 3.342107799130669e-05,
"loss": 7.49,
"step": 261500
},
{
"epoch": 1.0,
"learning_rate": 3.338937833163424e-05,
"loss": 7.4875,
"step": 262000
},
{
"epoch": 1.0,
"learning_rate": 3.335767867196178e-05,
"loss": 7.4904,
"step": 262500
},
{
"epoch": 1.0,
"learning_rate": 3.3325979012289325e-05,
"loss": 7.489,
"step": 263000
},
{
"epoch": 1.0,
"learning_rate": 3.329427935261687e-05,
"loss": 7.4787,
"step": 263500
},
{
"epoch": 1.0,
"learning_rate": 3.326257969294442e-05,
"loss": 7.5075,
"step": 264000
},
{
"epoch": 1.01,
"learning_rate": 3.323088003327197e-05,
"loss": 7.4919,
"step": 264500
},
{
"epoch": 1.01,
"learning_rate": 3.319918037359951e-05,
"loss": 7.4871,
"step": 265000
},
{
"epoch": 1.01,
"learning_rate": 3.3167480713927055e-05,
"loss": 7.488,
"step": 265500
},
{
"epoch": 1.01,
"learning_rate": 3.3135781054254605e-05,
"loss": 7.505,
"step": 266000
},
{
"epoch": 1.01,
"learning_rate": 3.310408139458215e-05,
"loss": 7.5001,
"step": 266500
},
{
"epoch": 1.02,
"learning_rate": 3.30723817349097e-05,
"loss": 7.4784,
"step": 267000
},
{
"epoch": 1.02,
"learning_rate": 3.304068207523724e-05,
"loss": 7.4899,
"step": 267500
},
{
"epoch": 1.02,
"learning_rate": 3.3008982415564785e-05,
"loss": 7.4622,
"step": 268000
},
{
"epoch": 1.02,
"learning_rate": 3.2977282755892335e-05,
"loss": 7.4945,
"step": 268500
},
{
"epoch": 1.02,
"learning_rate": 3.294558309621988e-05,
"loss": 7.4948,
"step": 269000
},
{
"epoch": 1.03,
"learning_rate": 3.291388343654743e-05,
"loss": 7.4886,
"step": 269500
},
{
"epoch": 1.03,
"learning_rate": 3.288218377687497e-05,
"loss": 7.4796,
"step": 270000
},
{
"epoch": 1.03,
"learning_rate": 3.2850484117202515e-05,
"loss": 7.4996,
"step": 270500
},
{
"epoch": 1.03,
"learning_rate": 3.2818784457530065e-05,
"loss": 7.4927,
"step": 271000
},
{
"epoch": 1.03,
"learning_rate": 3.278708479785761e-05,
"loss": 7.4866,
"step": 271500
},
{
"epoch": 1.03,
"learning_rate": 3.275538513818516e-05,
"loss": 7.5126,
"step": 272000
},
{
"epoch": 1.04,
"learning_rate": 3.272368547851271e-05,
"loss": 7.4901,
"step": 272500
},
{
"epoch": 1.04,
"learning_rate": 3.2691985818840245e-05,
"loss": 7.4772,
"step": 273000
},
{
"epoch": 1.04,
"learning_rate": 3.2660286159167795e-05,
"loss": 7.4876,
"step": 273500
},
{
"epoch": 1.04,
"learning_rate": 3.2628586499495346e-05,
"loss": 7.4819,
"step": 274000
},
{
"epoch": 1.04,
"learning_rate": 3.259688683982289e-05,
"loss": 7.4863,
"step": 274500
},
{
"epoch": 1.05,
"learning_rate": 3.256518718015044e-05,
"loss": 7.4794,
"step": 275000
},
{
"epoch": 1.05,
"learning_rate": 3.253348752047798e-05,
"loss": 7.4968,
"step": 275500
},
{
"epoch": 1.05,
"learning_rate": 3.2501787860805526e-05,
"loss": 7.4921,
"step": 276000
},
{
"epoch": 1.05,
"learning_rate": 3.2470088201133076e-05,
"loss": 7.4844,
"step": 276500
},
{
"epoch": 1.05,
"learning_rate": 3.243838854146062e-05,
"loss": 7.5079,
"step": 277000
},
{
"epoch": 1.06,
"learning_rate": 3.240668888178817e-05,
"loss": 7.4975,
"step": 277500
},
{
"epoch": 1.06,
"learning_rate": 3.237498922211571e-05,
"loss": 7.4969,
"step": 278000
},
{
"epoch": 1.06,
"learning_rate": 3.2343289562443256e-05,
"loss": 7.5057,
"step": 278500
},
{
"epoch": 1.06,
"learning_rate": 3.2311589902770806e-05,
"loss": 7.4997,
"step": 279000
},
{
"epoch": 1.06,
"learning_rate": 3.2279890243098356e-05,
"loss": 7.4847,
"step": 279500
},
{
"epoch": 1.07,
"learning_rate": 3.22481905834259e-05,
"loss": 7.4911,
"step": 280000
},
{
"epoch": 1.07,
"learning_rate": 3.221649092375344e-05,
"loss": 7.5051,
"step": 280500
},
{
"epoch": 1.07,
"learning_rate": 3.2184791264080986e-05,
"loss": 7.4742,
"step": 281000
},
{
"epoch": 1.07,
"learning_rate": 3.2153091604408536e-05,
"loss": 7.4969,
"step": 281500
},
{
"epoch": 1.07,
"learning_rate": 3.2121391944736086e-05,
"loss": 7.4884,
"step": 282000
},
{
"epoch": 1.07,
"learning_rate": 3.208969228506363e-05,
"loss": 7.4978,
"step": 282500
},
{
"epoch": 1.08,
"learning_rate": 3.205799262539117e-05,
"loss": 7.4849,
"step": 283000
},
{
"epoch": 1.08,
"learning_rate": 3.202629296571872e-05,
"loss": 7.4939,
"step": 283500
},
{
"epoch": 1.08,
"learning_rate": 3.1994593306046266e-05,
"loss": 7.4986,
"step": 284000
},
{
"epoch": 1.08,
"learning_rate": 3.1962893646373816e-05,
"loss": 7.4876,
"step": 284500
},
{
"epoch": 1.08,
"learning_rate": 3.193119398670136e-05,
"loss": 7.4895,
"step": 285000
},
{
"epoch": 1.09,
"learning_rate": 3.18994943270289e-05,
"loss": 7.49,
"step": 285500
},
{
"epoch": 1.09,
"learning_rate": 3.186779466735645e-05,
"loss": 7.4858,
"step": 286000
},
{
"epoch": 1.09,
"learning_rate": 3.1836095007683996e-05,
"loss": 7.492,
"step": 286500
},
{
"epoch": 1.09,
"learning_rate": 3.1804395348011546e-05,
"loss": 7.4854,
"step": 287000
},
{
"epoch": 1.09,
"learning_rate": 3.1772695688339096e-05,
"loss": 7.4993,
"step": 287500
},
{
"epoch": 1.1,
"learning_rate": 3.174099602866663e-05,
"loss": 7.4795,
"step": 288000
},
{
"epoch": 1.1,
"learning_rate": 3.170929636899418e-05,
"loss": 7.4866,
"step": 288500
},
{
"epoch": 1.1,
"learning_rate": 3.167759670932173e-05,
"loss": 7.4897,
"step": 289000
},
{
"epoch": 1.1,
"learning_rate": 3.1645897049649276e-05,
"loss": 7.4926,
"step": 289500
},
{
"epoch": 1.1,
"learning_rate": 3.1614197389976827e-05,
"loss": 7.4848,
"step": 290000
},
{
"epoch": 1.11,
"learning_rate": 3.158249773030437e-05,
"loss": 7.4971,
"step": 290500
},
{
"epoch": 1.11,
"learning_rate": 3.155079807063191e-05,
"loss": 7.4849,
"step": 291000
},
{
"epoch": 1.11,
"learning_rate": 3.151909841095946e-05,
"loss": 7.4962,
"step": 291500
},
{
"epoch": 1.11,
"learning_rate": 3.1487398751287007e-05,
"loss": 7.4818,
"step": 292000
},
{
"epoch": 1.11,
"learning_rate": 3.145569909161456e-05,
"loss": 7.4792,
"step": 292500
},
{
"epoch": 1.11,
"learning_rate": 3.14239994319421e-05,
"loss": 7.4821,
"step": 293000
},
{
"epoch": 1.12,
"learning_rate": 3.139229977226964e-05,
"loss": 7.4866,
"step": 293500
},
{
"epoch": 1.12,
"learning_rate": 3.1360600112597193e-05,
"loss": 7.4831,
"step": 294000
},
{
"epoch": 1.12,
"learning_rate": 3.132890045292474e-05,
"loss": 7.481,
"step": 294500
},
{
"epoch": 1.12,
"learning_rate": 3.129720079325229e-05,
"loss": 7.495,
"step": 295000
},
{
"epoch": 1.12,
"learning_rate": 3.126550113357983e-05,
"loss": 7.4764,
"step": 295500
},
{
"epoch": 1.13,
"learning_rate": 3.1233801473907373e-05,
"loss": 7.5023,
"step": 296000
},
{
"epoch": 1.13,
"learning_rate": 3.1202101814234924e-05,
"loss": 7.5015,
"step": 296500
},
{
"epoch": 1.13,
"learning_rate": 3.1170402154562474e-05,
"loss": 7.4767,
"step": 297000
},
{
"epoch": 1.13,
"learning_rate": 3.113870249489002e-05,
"loss": 7.4763,
"step": 297500
},
{
"epoch": 1.13,
"learning_rate": 3.110700283521756e-05,
"loss": 7.4957,
"step": 298000
},
{
"epoch": 1.14,
"learning_rate": 3.1075303175545104e-05,
"loss": 7.5016,
"step": 298500
},
{
"epoch": 1.14,
"learning_rate": 3.1043603515872654e-05,
"loss": 7.4787,
"step": 299000
},
{
"epoch": 1.14,
"learning_rate": 3.1011903856200204e-05,
"loss": 7.484,
"step": 299500
},
{
"epoch": 1.14,
"learning_rate": 3.098020419652775e-05,
"loss": 7.4945,
"step": 300000
},
{
"epoch": 1.14,
"learning_rate": 3.09485045368553e-05,
"loss": 7.5016,
"step": 300500
},
{
"epoch": 1.14,
"learning_rate": 3.091680487718284e-05,
"loss": 7.4848,
"step": 301000
},
{
"epoch": 1.15,
"learning_rate": 3.0885105217510384e-05,
"loss": 7.4909,
"step": 301500
},
{
"epoch": 1.15,
"learning_rate": 3.0853405557837934e-05,
"loss": 7.4784,
"step": 302000
},
{
"epoch": 1.15,
"learning_rate": 3.082170589816548e-05,
"loss": 7.4951,
"step": 302500
},
{
"epoch": 1.15,
"learning_rate": 3.079000623849303e-05,
"loss": 7.4843,
"step": 303000
},
{
"epoch": 1.15,
"learning_rate": 3.075830657882057e-05,
"loss": 7.5043,
"step": 303500
},
{
"epoch": 1.16,
"learning_rate": 3.0726606919148114e-05,
"loss": 7.4835,
"step": 304000
},
{
"epoch": 1.16,
"learning_rate": 3.0694907259475664e-05,
"loss": 7.4967,
"step": 304500
},
{
"epoch": 1.16,
"learning_rate": 3.0663207599803214e-05,
"loss": 7.4946,
"step": 305000
},
{
"epoch": 1.16,
"learning_rate": 3.063150794013076e-05,
"loss": 7.4888,
"step": 305500
},
{
"epoch": 1.16,
"learning_rate": 3.05998082804583e-05,
"loss": 7.4794,
"step": 306000
},
{
"epoch": 1.17,
"learning_rate": 3.056810862078585e-05,
"loss": 7.4754,
"step": 306500
},
{
"epoch": 1.17,
"learning_rate": 3.0536408961113394e-05,
"loss": 7.494,
"step": 307000
},
{
"epoch": 1.17,
"learning_rate": 3.050470930144094e-05,
"loss": 7.4847,
"step": 307500
},
{
"epoch": 1.17,
"learning_rate": 3.0473009641768484e-05,
"loss": 7.4921,
"step": 308000
},
{
"epoch": 1.17,
"learning_rate": 3.0441309982096034e-05,
"loss": 7.4799,
"step": 308500
},
{
"epoch": 1.18,
"learning_rate": 3.040961032242358e-05,
"loss": 7.5017,
"step": 309000
},
{
"epoch": 1.18,
"learning_rate": 3.0377910662751124e-05,
"loss": 7.4774,
"step": 309500
},
{
"epoch": 1.18,
"learning_rate": 3.0346211003078674e-05,
"loss": 7.4762,
"step": 310000
},
{
"epoch": 1.18,
"learning_rate": 3.031451134340622e-05,
"loss": 7.4793,
"step": 310500
},
{
"epoch": 1.18,
"learning_rate": 3.0282811683733764e-05,
"loss": 7.5039,
"step": 311000
},
{
"epoch": 1.18,
"learning_rate": 3.025111202406131e-05,
"loss": 7.4839,
"step": 311500
},
{
"epoch": 1.19,
"learning_rate": 3.0219412364388854e-05,
"loss": 7.484,
"step": 312000
},
{
"epoch": 1.19,
"learning_rate": 3.0187712704716405e-05,
"loss": 7.4838,
"step": 312500
},
{
"epoch": 1.19,
"learning_rate": 3.015601304504395e-05,
"loss": 7.4876,
"step": 313000
},
{
"epoch": 1.19,
"learning_rate": 3.0124313385371495e-05,
"loss": 7.4747,
"step": 313500
},
{
"epoch": 1.19,
"learning_rate": 3.009261372569904e-05,
"loss": 7.4817,
"step": 314000
},
{
"epoch": 1.2,
"learning_rate": 3.006091406602659e-05,
"loss": 7.4912,
"step": 314500
},
{
"epoch": 1.2,
"learning_rate": 3.0029214406354135e-05,
"loss": 7.4828,
"step": 315000
},
{
"epoch": 1.2,
"learning_rate": 2.999751474668168e-05,
"loss": 7.4845,
"step": 315500
},
{
"epoch": 1.2,
"learning_rate": 2.9965815087009225e-05,
"loss": 7.4827,
"step": 316000
},
{
"epoch": 1.2,
"learning_rate": 2.993411542733677e-05,
"loss": 7.4897,
"step": 316500
},
{
"epoch": 1.21,
"learning_rate": 2.990241576766432e-05,
"loss": 7.4898,
"step": 317000
},
{
"epoch": 1.21,
"learning_rate": 2.9870716107991865e-05,
"loss": 7.4848,
"step": 317500
},
{
"epoch": 1.21,
"learning_rate": 2.983901644831941e-05,
"loss": 7.4849,
"step": 318000
},
{
"epoch": 1.21,
"learning_rate": 2.980731678864696e-05,
"loss": 7.484,
"step": 318500
},
{
"epoch": 1.21,
"learning_rate": 2.9775617128974505e-05,
"loss": 7.4866,
"step": 319000
},
{
"epoch": 1.22,
"learning_rate": 2.974391746930205e-05,
"loss": 7.4855,
"step": 319500
},
{
"epoch": 1.22,
"learning_rate": 2.97122178096296e-05,
"loss": 7.483,
"step": 320000
},
{
"epoch": 1.22,
"learning_rate": 2.968051814995714e-05,
"loss": 7.4946,
"step": 320500
},
{
"epoch": 1.22,
"learning_rate": 2.9648818490284692e-05,
"loss": 7.4954,
"step": 321000
},
{
"epoch": 1.22,
"learning_rate": 2.9617118830612235e-05,
"loss": 7.4927,
"step": 321500
},
{
"epoch": 1.22,
"learning_rate": 2.9585419170939782e-05,
"loss": 7.4893,
"step": 322000
},
{
"epoch": 1.23,
"learning_rate": 2.9553719511267332e-05,
"loss": 7.4781,
"step": 322500
},
{
"epoch": 1.23,
"learning_rate": 2.9522019851594872e-05,
"loss": 7.4867,
"step": 323000
},
{
"epoch": 1.23,
"learning_rate": 2.9490320191922422e-05,
"loss": 7.489,
"step": 323500
},
{
"epoch": 1.23,
"learning_rate": 2.945862053224997e-05,
"loss": 7.4847,
"step": 324000
},
{
"epoch": 1.23,
"learning_rate": 2.9426920872577512e-05,
"loss": 7.4949,
"step": 324500
},
{
"epoch": 1.24,
"learning_rate": 2.9395221212905062e-05,
"loss": 7.4837,
"step": 325000
},
{
"epoch": 1.24,
"learning_rate": 2.9363521553232602e-05,
"loss": 7.4915,
"step": 325500
},
{
"epoch": 1.24,
"learning_rate": 2.9331821893560152e-05,
"loss": 7.5058,
"step": 326000
},
{
"epoch": 1.24,
"learning_rate": 2.93001222338877e-05,
"loss": 7.4839,
"step": 326500
},
{
"epoch": 1.24,
"learning_rate": 2.9268422574215242e-05,
"loss": 7.5023,
"step": 327000
},
{
"epoch": 1.25,
"learning_rate": 2.9236722914542792e-05,
"loss": 7.4928,
"step": 327500
},
{
"epoch": 1.25,
"learning_rate": 2.920502325487034e-05,
"loss": 7.4934,
"step": 328000
},
{
"epoch": 1.25,
"learning_rate": 2.9173323595197882e-05,
"loss": 7.4933,
"step": 328500
},
{
"epoch": 1.25,
"learning_rate": 2.9141623935525432e-05,
"loss": 7.4961,
"step": 329000
},
{
"epoch": 1.25,
"learning_rate": 2.9109924275852972e-05,
"loss": 7.4945,
"step": 329500
},
{
"epoch": 1.26,
"learning_rate": 2.9078224616180522e-05,
"loss": 7.4747,
"step": 330000
},
{
"epoch": 1.26,
"learning_rate": 2.904652495650807e-05,
"loss": 7.4819,
"step": 330500
},
{
"epoch": 1.26,
"learning_rate": 2.9014825296835612e-05,
"loss": 7.4906,
"step": 331000
},
{
"epoch": 1.26,
"learning_rate": 2.8983125637163162e-05,
"loss": 7.4994,
"step": 331500
},
{
"epoch": 1.26,
"learning_rate": 2.895142597749071e-05,
"loss": 7.4856,
"step": 332000
},
{
"epoch": 1.26,
"learning_rate": 2.8919726317818252e-05,
"loss": 7.4877,
"step": 332500
},
{
"epoch": 1.27,
"learning_rate": 2.88880266581458e-05,
"loss": 7.487,
"step": 333000
},
{
"epoch": 1.27,
"learning_rate": 2.8856326998473342e-05,
"loss": 7.4821,
"step": 333500
},
{
"epoch": 1.27,
"learning_rate": 2.8824627338800892e-05,
"loss": 7.4815,
"step": 334000
},
{
"epoch": 1.27,
"learning_rate": 2.879292767912844e-05,
"loss": 7.4797,
"step": 334500
},
{
"epoch": 1.27,
"learning_rate": 2.8761228019455983e-05,
"loss": 7.4878,
"step": 335000
},
{
"epoch": 1.28,
"learning_rate": 2.872952835978353e-05,
"loss": 7.4823,
"step": 335500
},
{
"epoch": 1.28,
"learning_rate": 2.869782870011108e-05,
"loss": 7.4816,
"step": 336000
},
{
"epoch": 1.28,
"learning_rate": 2.8666129040438623e-05,
"loss": 7.4992,
"step": 336500
},
{
"epoch": 1.28,
"learning_rate": 2.863442938076617e-05,
"loss": 7.4795,
"step": 337000
},
{
"epoch": 1.28,
"learning_rate": 2.860272972109372e-05,
"loss": 7.4896,
"step": 337500
},
{
"epoch": 1.29,
"learning_rate": 2.8571030061421263e-05,
"loss": 7.4846,
"step": 338000
},
{
"epoch": 1.29,
"learning_rate": 2.853933040174881e-05,
"loss": 7.4808,
"step": 338500
},
{
"epoch": 1.29,
"learning_rate": 2.8507630742076353e-05,
"loss": 7.4826,
"step": 339000
},
{
"epoch": 1.29,
"learning_rate": 2.84759310824039e-05,
"loss": 7.4996,
"step": 339500
},
{
"epoch": 1.29,
"learning_rate": 2.844423142273145e-05,
"loss": 7.492,
"step": 340000
},
{
"epoch": 1.3,
"learning_rate": 2.8412531763058993e-05,
"loss": 7.4846,
"step": 340500
},
{
"epoch": 1.3,
"learning_rate": 2.838083210338654e-05,
"loss": 7.4898,
"step": 341000
},
{
"epoch": 1.3,
"learning_rate": 2.834913244371409e-05,
"loss": 7.5043,
"step": 341500
},
{
"epoch": 1.3,
"learning_rate": 2.831743278404163e-05,
"loss": 7.4931,
"step": 342000
},
{
"epoch": 1.3,
"learning_rate": 2.828573312436918e-05,
"loss": 7.4835,
"step": 342500
},
{
"epoch": 1.3,
"learning_rate": 2.8254033464696723e-05,
"loss": 7.4702,
"step": 343000
},
{
"epoch": 1.31,
"learning_rate": 2.822233380502427e-05,
"loss": 7.4893,
"step": 343500
},
{
"epoch": 1.31,
"learning_rate": 2.819063414535182e-05,
"loss": 7.4891,
"step": 344000
},
{
"epoch": 1.31,
"learning_rate": 2.815893448567936e-05,
"loss": 7.4892,
"step": 344500
},
{
"epoch": 1.31,
"learning_rate": 2.812723482600691e-05,
"loss": 7.493,
"step": 345000
},
{
"epoch": 1.31,
"learning_rate": 2.8095535166334457e-05,
"loss": 7.4759,
"step": 345500
},
{
"epoch": 1.32,
"learning_rate": 2.8063835506662e-05,
"loss": 7.4783,
"step": 346000
},
{
"epoch": 1.32,
"learning_rate": 2.803213584698955e-05,
"loss": 7.4879,
"step": 346500
},
{
"epoch": 1.32,
"learning_rate": 2.8000436187317093e-05,
"loss": 7.4805,
"step": 347000
},
{
"epoch": 1.32,
"learning_rate": 2.796873652764464e-05,
"loss": 7.4743,
"step": 347500
},
{
"epoch": 1.32,
"learning_rate": 2.7937036867972187e-05,
"loss": 7.4829,
"step": 348000
},
{
"epoch": 1.33,
"learning_rate": 2.790533720829973e-05,
"loss": 7.4825,
"step": 348500
},
{
"epoch": 1.33,
"learning_rate": 2.787363754862728e-05,
"loss": 7.4946,
"step": 349000
},
{
"epoch": 1.33,
"learning_rate": 2.7841937888954827e-05,
"loss": 7.4916,
"step": 349500
},
{
"epoch": 1.33,
"learning_rate": 2.781023822928237e-05,
"loss": 7.4735,
"step": 350000
},
{
"epoch": 1.33,
"learning_rate": 2.777853856960992e-05,
"loss": 7.4845,
"step": 350500
},
{
"epoch": 1.34,
"learning_rate": 2.774683890993746e-05,
"loss": 7.4936,
"step": 351000
},
{
"epoch": 1.34,
"learning_rate": 2.771513925026501e-05,
"loss": 7.4954,
"step": 351500
},
{
"epoch": 1.34,
"learning_rate": 2.7683439590592557e-05,
"loss": 7.4748,
"step": 352000
},
{
"epoch": 1.34,
"learning_rate": 2.76517399309201e-05,
"loss": 7.5012,
"step": 352500
},
{
"epoch": 1.34,
"learning_rate": 2.762004027124765e-05,
"loss": 7.4788,
"step": 353000
},
{
"epoch": 1.34,
"learning_rate": 2.7588340611575197e-05,
"loss": 7.487,
"step": 353500
},
{
"epoch": 1.35,
"learning_rate": 2.755664095190274e-05,
"loss": 7.4891,
"step": 354000
},
{
"epoch": 1.35,
"learning_rate": 2.7524941292230287e-05,
"loss": 7.4856,
"step": 354500
},
{
"epoch": 1.35,
"learning_rate": 2.7493241632557837e-05,
"loss": 7.4819,
"step": 355000
},
{
"epoch": 1.35,
"learning_rate": 2.746154197288538e-05,
"loss": 7.4803,
"step": 355500
},
{
"epoch": 1.35,
"learning_rate": 2.7429842313212927e-05,
"loss": 7.4736,
"step": 356000
},
{
"epoch": 1.36,
"learning_rate": 2.739814265354047e-05,
"loss": 7.4798,
"step": 356500
},
{
"epoch": 1.36,
"learning_rate": 2.736644299386802e-05,
"loss": 7.5073,
"step": 357000
},
{
"epoch": 1.36,
"learning_rate": 2.7334743334195567e-05,
"loss": 7.4815,
"step": 357500
},
{
"epoch": 1.36,
"learning_rate": 2.730304367452311e-05,
"loss": 7.4827,
"step": 358000
},
{
"epoch": 1.36,
"learning_rate": 2.7271344014850657e-05,
"loss": 7.4744,
"step": 358500
},
{
"epoch": 1.37,
"learning_rate": 2.7239644355178207e-05,
"loss": 7.4977,
"step": 359000
},
{
"epoch": 1.37,
"learning_rate": 2.720794469550575e-05,
"loss": 7.4811,
"step": 359500
},
{
"epoch": 1.37,
"learning_rate": 2.7176245035833297e-05,
"loss": 7.4886,
"step": 360000
},
{
"epoch": 1.37,
"learning_rate": 2.714454537616084e-05,
"loss": 7.4825,
"step": 360500
},
{
"epoch": 1.37,
"learning_rate": 2.7112845716488387e-05,
"loss": 7.4851,
"step": 361000
},
{
"epoch": 1.38,
"learning_rate": 2.7081146056815938e-05,
"loss": 7.5017,
"step": 361500
},
{
"epoch": 1.38,
"learning_rate": 2.704944639714348e-05,
"loss": 7.4827,
"step": 362000
},
{
"epoch": 1.38,
"learning_rate": 2.7017746737471028e-05,
"loss": 7.4915,
"step": 362500
},
{
"epoch": 1.38,
"learning_rate": 2.6986047077798578e-05,
"loss": 7.501,
"step": 363000
},
{
"epoch": 1.38,
"learning_rate": 2.6954347418126118e-05,
"loss": 7.4834,
"step": 363500
},
{
"epoch": 1.38,
"learning_rate": 2.6922647758453668e-05,
"loss": 7.484,
"step": 364000
},
{
"epoch": 1.39,
"learning_rate": 2.689094809878121e-05,
"loss": 7.4934,
"step": 364500
},
{
"epoch": 1.39,
"learning_rate": 2.6859248439108758e-05,
"loss": 7.4851,
"step": 365000
},
{
"epoch": 1.39,
"learning_rate": 2.6827548779436308e-05,
"loss": 7.4833,
"step": 365500
},
{
"epoch": 1.39,
"learning_rate": 2.679584911976385e-05,
"loss": 7.4964,
"step": 366000
},
{
"epoch": 1.39,
"learning_rate": 2.6764149460091398e-05,
"loss": 7.4807,
"step": 366500
},
{
"epoch": 1.4,
"learning_rate": 2.6732449800418944e-05,
"loss": 7.4819,
"step": 367000
},
{
"epoch": 1.4,
"learning_rate": 2.6700750140746488e-05,
"loss": 7.4797,
"step": 367500
},
{
"epoch": 1.4,
"learning_rate": 2.6669050481074038e-05,
"loss": 7.4831,
"step": 368000
},
{
"epoch": 1.4,
"learning_rate": 2.6637350821401585e-05,
"loss": 7.4914,
"step": 368500
},
{
"epoch": 1.4,
"learning_rate": 2.6605651161729128e-05,
"loss": 7.4967,
"step": 369000
},
{
"epoch": 1.41,
"learning_rate": 2.6573951502056678e-05,
"loss": 7.4916,
"step": 369500
},
{
"epoch": 1.41,
"learning_rate": 2.6542251842384218e-05,
"loss": 7.4784,
"step": 370000
},
{
"epoch": 1.41,
"learning_rate": 2.6510552182711768e-05,
"loss": 7.507,
"step": 370500
},
{
"epoch": 1.41,
"learning_rate": 2.6478852523039315e-05,
"loss": 7.4964,
"step": 371000
},
{
"epoch": 1.41,
"learning_rate": 2.6447152863366858e-05,
"loss": 7.4925,
"step": 371500
},
{
"epoch": 1.42,
"learning_rate": 2.6415453203694408e-05,
"loss": 7.4782,
"step": 372000
},
{
"epoch": 1.42,
"learning_rate": 2.6383753544021955e-05,
"loss": 7.4683,
"step": 372500
},
{
"epoch": 1.42,
"learning_rate": 2.6352053884349498e-05,
"loss": 7.4785,
"step": 373000
},
{
"epoch": 1.42,
"learning_rate": 2.6320354224677045e-05,
"loss": 7.4876,
"step": 373500
},
{
"epoch": 1.42,
"learning_rate": 2.6288654565004588e-05,
"loss": 7.4889,
"step": 374000
},
{
"epoch": 1.42,
"learning_rate": 2.6256954905332138e-05,
"loss": 7.4773,
"step": 374500
},
{
"epoch": 1.43,
"learning_rate": 2.6225255245659685e-05,
"loss": 7.4971,
"step": 375000
},
{
"epoch": 1.43,
"learning_rate": 2.6193555585987228e-05,
"loss": 7.4915,
"step": 375500
},
{
"epoch": 1.43,
"learning_rate": 2.6161855926314775e-05,
"loss": 7.5012,
"step": 376000
},
{
"epoch": 1.43,
"learning_rate": 2.6130156266642325e-05,
"loss": 7.488,
"step": 376500
},
{
"epoch": 1.43,
"learning_rate": 2.609845660696987e-05,
"loss": 7.5013,
"step": 377000
},
{
"epoch": 1.44,
"learning_rate": 2.6066756947297415e-05,
"loss": 7.4978,
"step": 377500
},
{
"epoch": 1.44,
"learning_rate": 2.603505728762496e-05,
"loss": 7.489,
"step": 378000
},
{
"epoch": 1.44,
"learning_rate": 2.600335762795251e-05,
"loss": 7.4857,
"step": 378500
},
{
"epoch": 1.44,
"learning_rate": 2.5971657968280055e-05,
"loss": 7.4906,
"step": 379000
},
{
"epoch": 1.44,
"learning_rate": 2.59399583086076e-05,
"loss": 7.486,
"step": 379500
},
{
"epoch": 1.45,
"learning_rate": 2.5908258648935145e-05,
"loss": 7.4792,
"step": 380000
},
{
"epoch": 1.45,
"learning_rate": 2.5876558989262695e-05,
"loss": 7.4805,
"step": 380500
},
{
"epoch": 1.45,
"learning_rate": 2.584485932959024e-05,
"loss": 7.4858,
"step": 381000
},
{
"epoch": 1.45,
"learning_rate": 2.5813159669917785e-05,
"loss": 7.5006,
"step": 381500
},
{
"epoch": 1.45,
"learning_rate": 2.578146001024533e-05,
"loss": 7.4816,
"step": 382000
},
{
"epoch": 1.46,
"learning_rate": 2.5749760350572875e-05,
"loss": 7.4938,
"step": 382500
},
{
"epoch": 1.46,
"learning_rate": 2.5718060690900425e-05,
"loss": 7.4812,
"step": 383000
},
{
"epoch": 1.46,
"learning_rate": 2.568636103122797e-05,
"loss": 7.4793,
"step": 383500
},
{
"epoch": 1.46,
"learning_rate": 2.5654661371555515e-05,
"loss": 7.4912,
"step": 384000
},
{
"epoch": 1.46,
"learning_rate": 2.5622961711883066e-05,
"loss": 7.4782,
"step": 384500
},
{
"epoch": 1.46,
"learning_rate": 2.559126205221061e-05,
"loss": 7.4864,
"step": 385000
},
{
"epoch": 1.47,
"learning_rate": 2.5559562392538156e-05,
"loss": 7.4904,
"step": 385500
},
{
"epoch": 1.47,
"learning_rate": 2.5527862732865702e-05,
"loss": 7.4791,
"step": 386000
},
{
"epoch": 1.47,
"learning_rate": 2.5496163073193246e-05,
"loss": 7.4856,
"step": 386500
},
{
"epoch": 1.47,
"learning_rate": 2.5464463413520796e-05,
"loss": 7.4887,
"step": 387000
},
{
"epoch": 1.47,
"learning_rate": 2.543276375384834e-05,
"loss": 7.4914,
"step": 387500
},
{
"epoch": 1.48,
"learning_rate": 2.5401064094175886e-05,
"loss": 7.4958,
"step": 388000
},
{
"epoch": 1.48,
"learning_rate": 2.5369364434503436e-05,
"loss": 7.4776,
"step": 388500
},
{
"epoch": 1.48,
"learning_rate": 2.5337664774830976e-05,
"loss": 7.4802,
"step": 389000
},
{
"epoch": 1.48,
"learning_rate": 2.5305965115158526e-05,
"loss": 7.4843,
"step": 389500
},
{
"epoch": 1.48,
"learning_rate": 2.5274265455486073e-05,
"loss": 7.4739,
"step": 390000
},
{
"epoch": 1.49,
"learning_rate": 2.5242565795813616e-05,
"loss": 7.4993,
"step": 390500
},
{
"epoch": 1.49,
"learning_rate": 2.5210866136141166e-05,
"loss": 7.4867,
"step": 391000
},
{
"epoch": 1.49,
"learning_rate": 2.5179166476468706e-05,
"loss": 7.5017,
"step": 391500
},
{
"epoch": 1.49,
"learning_rate": 2.5147466816796256e-05,
"loss": 7.4876,
"step": 392000
},
{
"epoch": 1.49,
"learning_rate": 2.5115767157123803e-05,
"loss": 7.5033,
"step": 392500
},
{
"epoch": 1.49,
"learning_rate": 2.5084067497451346e-05,
"loss": 7.4735,
"step": 393000
},
{
"epoch": 1.5,
"learning_rate": 2.5052367837778896e-05,
"loss": 7.4946,
"step": 393500
},
{
"epoch": 1.5,
"learning_rate": 2.5020668178106443e-05,
"loss": 7.4833,
"step": 394000
},
{
"epoch": 1.5,
"learning_rate": 2.4988968518433986e-05,
"loss": 7.4954,
"step": 394500
},
{
"epoch": 1.5,
"learning_rate": 2.4957268858761533e-05,
"loss": 7.4927,
"step": 395000
},
{
"epoch": 1.5,
"learning_rate": 2.492556919908908e-05,
"loss": 7.4963,
"step": 395500
},
{
"epoch": 1.51,
"learning_rate": 2.4893869539416626e-05,
"loss": 7.4996,
"step": 396000
},
{
"epoch": 1.51,
"learning_rate": 2.486216987974417e-05,
"loss": 7.4848,
"step": 396500
},
{
"epoch": 1.51,
"learning_rate": 2.483047022007172e-05,
"loss": 7.4834,
"step": 397000
},
{
"epoch": 1.51,
"learning_rate": 2.4798770560399266e-05,
"loss": 7.4839,
"step": 397500
},
{
"epoch": 1.51,
"learning_rate": 2.476707090072681e-05,
"loss": 7.5004,
"step": 398000
},
{
"epoch": 1.52,
"learning_rate": 2.4735371241054356e-05,
"loss": 7.4833,
"step": 398500
},
{
"epoch": 1.52,
"learning_rate": 2.4703671581381903e-05,
"loss": 7.4885,
"step": 399000
},
{
"epoch": 1.52,
"learning_rate": 2.467197192170945e-05,
"loss": 7.4587,
"step": 399500
},
{
"epoch": 1.52,
"learning_rate": 2.4640272262036996e-05,
"loss": 7.4901,
"step": 400000
},
{
"epoch": 1.52,
"learning_rate": 2.4608572602364543e-05,
"loss": 7.487,
"step": 400500
},
{
"epoch": 1.53,
"learning_rate": 2.457687294269209e-05,
"loss": 7.5021,
"step": 401000
},
{
"epoch": 1.53,
"learning_rate": 2.4545173283019633e-05,
"loss": 7.4917,
"step": 401500
},
{
"epoch": 1.53,
"learning_rate": 2.451347362334718e-05,
"loss": 7.4905,
"step": 402000
},
{
"epoch": 1.53,
"learning_rate": 2.448177396367473e-05,
"loss": 7.4921,
"step": 402500
},
{
"epoch": 1.53,
"learning_rate": 2.4450074304002273e-05,
"loss": 7.4727,
"step": 403000
},
{
"epoch": 1.53,
"learning_rate": 2.441837464432982e-05,
"loss": 7.4932,
"step": 403500
},
{
"epoch": 1.54,
"learning_rate": 2.4386674984657363e-05,
"loss": 7.4857,
"step": 404000
},
{
"epoch": 1.54,
"learning_rate": 2.4354975324984913e-05,
"loss": 7.4678,
"step": 404500
},
{
"epoch": 1.54,
"learning_rate": 2.432327566531246e-05,
"loss": 7.5037,
"step": 405000
},
{
"epoch": 1.54,
"learning_rate": 2.4291576005640003e-05,
"loss": 7.4715,
"step": 405500
},
{
"epoch": 1.54,
"learning_rate": 2.425987634596755e-05,
"loss": 7.4757,
"step": 406000
},
{
"epoch": 1.55,
"learning_rate": 2.4228176686295097e-05,
"loss": 7.4853,
"step": 406500
},
{
"epoch": 1.55,
"learning_rate": 2.4196477026622644e-05,
"loss": 7.4942,
"step": 407000
},
{
"epoch": 1.55,
"learning_rate": 2.416477736695019e-05,
"loss": 7.4944,
"step": 407500
},
{
"epoch": 1.55,
"learning_rate": 2.4133077707277734e-05,
"loss": 7.4825,
"step": 408000
},
{
"epoch": 1.55,
"learning_rate": 2.4101378047605284e-05,
"loss": 7.4988,
"step": 408500
},
{
"epoch": 1.56,
"learning_rate": 2.4069678387932827e-05,
"loss": 7.4786,
"step": 409000
},
{
"epoch": 1.56,
"learning_rate": 2.4037978728260374e-05,
"loss": 7.4794,
"step": 409500
},
{
"epoch": 1.56,
"learning_rate": 2.400627906858792e-05,
"loss": 7.4878,
"step": 410000
},
{
"epoch": 1.56,
"learning_rate": 2.3974579408915467e-05,
"loss": 7.4908,
"step": 410500
},
{
"epoch": 1.56,
"learning_rate": 2.3942879749243014e-05,
"loss": 7.4775,
"step": 411000
},
{
"epoch": 1.57,
"learning_rate": 2.391118008957056e-05,
"loss": 7.4931,
"step": 411500
},
{
"epoch": 1.57,
"learning_rate": 2.3879480429898104e-05,
"loss": 7.479,
"step": 412000
},
{
"epoch": 1.57,
"learning_rate": 2.3847780770225654e-05,
"loss": 7.4955,
"step": 412500
},
{
"epoch": 1.57,
"learning_rate": 2.3816081110553197e-05,
"loss": 7.4721,
"step": 413000
},
{
"epoch": 1.57,
"learning_rate": 2.3784381450880744e-05,
"loss": 7.4964,
"step": 413500
},
{
"epoch": 1.57,
"learning_rate": 2.375268179120829e-05,
"loss": 7.4841,
"step": 414000
},
{
"epoch": 1.58,
"learning_rate": 2.3720982131535837e-05,
"loss": 7.4801,
"step": 414500
},
{
"epoch": 1.58,
"learning_rate": 2.3689282471863384e-05,
"loss": 7.4837,
"step": 415000
},
{
"epoch": 1.58,
"learning_rate": 2.3657582812190927e-05,
"loss": 7.4862,
"step": 415500
},
{
"epoch": 1.58,
"learning_rate": 2.3625883152518477e-05,
"loss": 7.4814,
"step": 416000
},
{
"epoch": 1.58,
"learning_rate": 2.3594183492846024e-05,
"loss": 7.4863,
"step": 416500
},
{
"epoch": 1.59,
"learning_rate": 2.3562483833173567e-05,
"loss": 7.4836,
"step": 417000
},
{
"epoch": 1.59,
"learning_rate": 2.3530784173501114e-05,
"loss": 7.4956,
"step": 417500
},
{
"epoch": 1.59,
"learning_rate": 2.349908451382866e-05,
"loss": 7.4953,
"step": 418000
},
{
"epoch": 1.59,
"learning_rate": 2.3467384854156208e-05,
"loss": 7.4966,
"step": 418500
},
{
"epoch": 1.59,
"learning_rate": 2.3435685194483754e-05,
"loss": 7.4903,
"step": 419000
},
{
"epoch": 1.6,
"learning_rate": 2.3403985534811298e-05,
"loss": 7.491,
"step": 419500
},
{
"epoch": 1.6,
"learning_rate": 2.3372285875138848e-05,
"loss": 7.4829,
"step": 420000
},
{
"epoch": 1.6,
"learning_rate": 2.334058621546639e-05,
"loss": 7.4785,
"step": 420500
},
{
"epoch": 1.6,
"learning_rate": 2.3308886555793938e-05,
"loss": 7.4789,
"step": 421000
},
{
"epoch": 1.6,
"learning_rate": 2.3277186896121484e-05,
"loss": 7.4857,
"step": 421500
},
{
"epoch": 1.61,
"learning_rate": 2.324548723644903e-05,
"loss": 7.4795,
"step": 422000
},
{
"epoch": 1.61,
"learning_rate": 2.3213787576776578e-05,
"loss": 7.4787,
"step": 422500
},
{
"epoch": 1.61,
"learning_rate": 2.318208791710412e-05,
"loss": 7.4901,
"step": 423000
},
{
"epoch": 1.61,
"learning_rate": 2.3150388257431668e-05,
"loss": 7.4851,
"step": 423500
},
{
"epoch": 1.61,
"learning_rate": 2.3118688597759218e-05,
"loss": 7.4885,
"step": 424000
},
{
"epoch": 1.61,
"learning_rate": 2.308698893808676e-05,
"loss": 7.4778,
"step": 424500
},
{
"epoch": 1.62,
"learning_rate": 2.3055289278414308e-05,
"loss": 7.4719,
"step": 425000
},
{
"epoch": 1.62,
"learning_rate": 2.3023589618741855e-05,
"loss": 7.4956,
"step": 425500
},
{
"epoch": 1.62,
"learning_rate": 2.29918899590694e-05,
"loss": 7.4809,
"step": 426000
},
{
"epoch": 1.62,
"learning_rate": 2.2960190299396948e-05,
"loss": 7.4857,
"step": 426500
},
{
"epoch": 1.62,
"learning_rate": 2.292849063972449e-05,
"loss": 7.4774,
"step": 427000
},
{
"epoch": 1.63,
"learning_rate": 2.2896790980052038e-05,
"loss": 7.479,
"step": 427500
},
{
"epoch": 1.63,
"learning_rate": 2.2865091320379585e-05,
"loss": 7.4812,
"step": 428000
},
{
"epoch": 1.63,
"learning_rate": 2.283339166070713e-05,
"loss": 7.4783,
"step": 428500
},
{
"epoch": 1.63,
"learning_rate": 2.2801692001034678e-05,
"loss": 7.4832,
"step": 429000
},
{
"epoch": 1.63,
"learning_rate": 2.276999234136222e-05,
"loss": 7.4875,
"step": 429500
},
{
"epoch": 1.64,
"learning_rate": 2.273829268168977e-05,
"loss": 7.4899,
"step": 430000
},
{
"epoch": 1.64,
"learning_rate": 2.270659302201732e-05,
"loss": 7.4836,
"step": 430500
},
{
"epoch": 1.64,
"learning_rate": 2.267489336234486e-05,
"loss": 7.4894,
"step": 431000
},
{
"epoch": 1.64,
"learning_rate": 2.2643193702672412e-05,
"loss": 7.4748,
"step": 431500
},
{
"epoch": 1.64,
"learning_rate": 2.2611494042999955e-05,
"loss": 7.4979,
"step": 432000
},
{
"epoch": 1.65,
"learning_rate": 2.2579794383327502e-05,
"loss": 7.509,
"step": 432500
},
{
"epoch": 1.65,
"learning_rate": 2.254809472365505e-05,
"loss": 7.4842,
"step": 433000
},
{
"epoch": 1.65,
"learning_rate": 2.2516395063982595e-05,
"loss": 7.4776,
"step": 433500
},
{
"epoch": 1.65,
"learning_rate": 2.2484695404310142e-05,
"loss": 7.488,
"step": 434000
},
{
"epoch": 1.65,
"learning_rate": 2.2452995744637685e-05,
"loss": 7.5027,
"step": 434500
},
{
"epoch": 1.65,
"learning_rate": 2.2421296084965232e-05,
"loss": 7.4735,
"step": 435000
},
{
"epoch": 1.66,
"learning_rate": 2.2389596425292782e-05,
"loss": 7.4812,
"step": 435500
},
{
"epoch": 1.66,
"learning_rate": 2.2357896765620325e-05,
"loss": 7.5071,
"step": 436000
},
{
"epoch": 1.66,
"learning_rate": 2.2326197105947872e-05,
"loss": 7.4867,
"step": 436500
},
{
"epoch": 1.66,
"learning_rate": 2.2294497446275415e-05,
"loss": 7.4881,
"step": 437000
},
{
"epoch": 1.66,
"learning_rate": 2.2262797786602965e-05,
"loss": 7.4917,
"step": 437500
},
{
"epoch": 1.67,
"learning_rate": 2.2231098126930512e-05,
"loss": 7.4892,
"step": 438000
},
{
"epoch": 1.67,
"learning_rate": 2.2199398467258055e-05,
"loss": 7.4777,
"step": 438500
},
{
"epoch": 1.67,
"learning_rate": 2.2167698807585602e-05,
"loss": 7.5001,
"step": 439000
},
{
"epoch": 1.67,
"learning_rate": 2.213599914791315e-05,
"loss": 7.4948,
"step": 439500
},
{
"epoch": 1.67,
"learning_rate": 2.2104299488240696e-05,
"loss": 7.4712,
"step": 440000
},
{
"epoch": 1.68,
"learning_rate": 2.2072599828568242e-05,
"loss": 7.4989,
"step": 440500
},
{
"epoch": 1.68,
"learning_rate": 2.2040900168895786e-05,
"loss": 7.4885,
"step": 441000
},
{
"epoch": 1.68,
"learning_rate": 2.2009200509223336e-05,
"loss": 7.4871,
"step": 441500
},
{
"epoch": 1.68,
"learning_rate": 2.197750084955088e-05,
"loss": 7.4881,
"step": 442000
},
{
"epoch": 1.68,
"learning_rate": 2.1945801189878426e-05,
"loss": 7.4896,
"step": 442500
},
{
"epoch": 1.69,
"learning_rate": 2.1914101530205972e-05,
"loss": 7.4904,
"step": 443000
},
{
"epoch": 1.69,
"learning_rate": 2.188240187053352e-05,
"loss": 7.4898,
"step": 443500
},
{
"epoch": 1.69,
"learning_rate": 2.1850702210861066e-05,
"loss": 7.4957,
"step": 444000
},
{
"epoch": 1.69,
"learning_rate": 2.1819002551188612e-05,
"loss": 7.4865,
"step": 444500
},
{
"epoch": 1.69,
"learning_rate": 2.1787302891516156e-05,
"loss": 7.4915,
"step": 445000
},
{
"epoch": 1.69,
"learning_rate": 2.1755603231843706e-05,
"loss": 7.497,
"step": 445500
},
{
"epoch": 1.7,
"learning_rate": 2.172390357217125e-05,
"loss": 7.4874,
"step": 446000
},
{
"epoch": 1.7,
"learning_rate": 2.1692203912498796e-05,
"loss": 7.4772,
"step": 446500
},
{
"epoch": 1.7,
"learning_rate": 2.1660504252826343e-05,
"loss": 7.4974,
"step": 447000
},
{
"epoch": 1.7,
"learning_rate": 2.162880459315389e-05,
"loss": 7.4917,
"step": 447500
},
{
"epoch": 1.7,
"learning_rate": 2.1597104933481436e-05,
"loss": 7.4971,
"step": 448000
},
{
"epoch": 1.71,
"learning_rate": 2.156540527380898e-05,
"loss": 7.4751,
"step": 448500
},
{
"epoch": 1.71,
"learning_rate": 2.153370561413653e-05,
"loss": 7.4918,
"step": 449000
},
{
"epoch": 1.71,
"learning_rate": 2.1502005954464076e-05,
"loss": 7.4861,
"step": 449500
},
{
"epoch": 1.71,
"learning_rate": 2.147030629479162e-05,
"loss": 7.4963,
"step": 450000
},
{
"epoch": 1.71,
"learning_rate": 2.1438606635119166e-05,
"loss": 7.4781,
"step": 450500
},
{
"epoch": 1.72,
"learning_rate": 2.1406906975446713e-05,
"loss": 7.4913,
"step": 451000
},
{
"epoch": 1.72,
"learning_rate": 2.137520731577426e-05,
"loss": 7.4899,
"step": 451500
},
{
"epoch": 1.72,
"learning_rate": 2.1343507656101806e-05,
"loss": 7.4869,
"step": 452000
},
{
"epoch": 1.72,
"learning_rate": 2.131180799642935e-05,
"loss": 7.4908,
"step": 452500
},
{
"epoch": 1.72,
"learning_rate": 2.12801083367569e-05,
"loss": 7.4979,
"step": 453000
},
{
"epoch": 1.73,
"learning_rate": 2.1248408677084443e-05,
"loss": 7.4752,
"step": 453500
},
{
"epoch": 1.73,
"learning_rate": 2.121670901741199e-05,
"loss": 7.4942,
"step": 454000
},
{
"epoch": 1.73,
"learning_rate": 2.1185009357739536e-05,
"loss": 7.4794,
"step": 454500
},
{
"epoch": 1.73,
"learning_rate": 2.1153309698067083e-05,
"loss": 7.5003,
"step": 455000
},
{
"epoch": 1.73,
"learning_rate": 2.112161003839463e-05,
"loss": 7.4842,
"step": 455500
},
{
"epoch": 1.73,
"learning_rate": 2.1089910378722173e-05,
"loss": 7.49,
"step": 456000
},
{
"epoch": 1.74,
"learning_rate": 2.105821071904972e-05,
"loss": 7.4955,
"step": 456500
},
{
"epoch": 1.74,
"learning_rate": 2.102651105937727e-05,
"loss": 7.4922,
"step": 457000
},
{
"epoch": 1.74,
"learning_rate": 2.0994811399704813e-05,
"loss": 7.4992,
"step": 457500
},
{
"epoch": 1.74,
"learning_rate": 2.096311174003236e-05,
"loss": 7.4878,
"step": 458000
},
{
"epoch": 1.74,
"learning_rate": 2.0931412080359907e-05,
"loss": 7.4955,
"step": 458500
},
{
"epoch": 1.75,
"learning_rate": 2.0899712420687453e-05,
"loss": 7.4969,
"step": 459000
},
{
"epoch": 1.75,
"learning_rate": 2.0868012761015e-05,
"loss": 7.5065,
"step": 459500
},
{
"epoch": 1.75,
"learning_rate": 2.0836313101342543e-05,
"loss": 7.4951,
"step": 460000
},
{
"epoch": 1.75,
"learning_rate": 2.080461344167009e-05,
"loss": 7.485,
"step": 460500
},
{
"epoch": 1.75,
"learning_rate": 2.0772913781997637e-05,
"loss": 7.4797,
"step": 461000
},
{
"epoch": 1.76,
"learning_rate": 2.0741214122325183e-05,
"loss": 7.495,
"step": 461500
},
{
"epoch": 1.76,
"learning_rate": 2.070951446265273e-05,
"loss": 7.4678,
"step": 462000
},
{
"epoch": 1.76,
"learning_rate": 2.0677814802980274e-05,
"loss": 7.5105,
"step": 462500
},
{
"epoch": 1.76,
"learning_rate": 2.0646115143307824e-05,
"loss": 7.4975,
"step": 463000
},
{
"epoch": 1.76,
"learning_rate": 2.061441548363537e-05,
"loss": 7.483,
"step": 463500
},
{
"epoch": 1.77,
"learning_rate": 2.0582715823962914e-05,
"loss": 7.4876,
"step": 464000
},
{
"epoch": 1.77,
"learning_rate": 2.0551016164290464e-05,
"loss": 7.5046,
"step": 464500
},
{
"epoch": 1.77,
"learning_rate": 2.0519316504618007e-05,
"loss": 7.4868,
"step": 465000
},
{
"epoch": 1.77,
"learning_rate": 2.0487616844945554e-05,
"loss": 7.4981,
"step": 465500
},
{
"epoch": 1.77,
"learning_rate": 2.04559171852731e-05,
"loss": 7.4838,
"step": 466000
},
{
"epoch": 1.77,
"learning_rate": 2.0424217525600647e-05,
"loss": 7.4986,
"step": 466500
},
{
"epoch": 1.78,
"learning_rate": 2.0392517865928194e-05,
"loss": 7.4767,
"step": 467000
},
{
"epoch": 1.78,
"learning_rate": 2.0360818206255737e-05,
"loss": 7.4845,
"step": 467500
},
{
"epoch": 1.78,
"learning_rate": 2.0329118546583284e-05,
"loss": 7.4986,
"step": 468000
},
{
"epoch": 1.78,
"learning_rate": 2.029741888691083e-05,
"loss": 7.4984,
"step": 468500
},
{
"epoch": 1.78,
"learning_rate": 2.0265719227238377e-05,
"loss": 7.4851,
"step": 469000
},
{
"epoch": 1.79,
"learning_rate": 2.0234019567565924e-05,
"loss": 7.4876,
"step": 469500
},
{
"epoch": 1.79,
"learning_rate": 2.0202319907893467e-05,
"loss": 7.4906,
"step": 470000
},
{
"epoch": 1.79,
"learning_rate": 2.0170620248221017e-05,
"loss": 7.4916,
"step": 470500
},
{
"epoch": 1.79,
"learning_rate": 2.0138920588548564e-05,
"loss": 7.4783,
"step": 471000
},
{
"epoch": 1.79,
"learning_rate": 2.0107220928876107e-05,
"loss": 7.492,
"step": 471500
},
{
"epoch": 1.8,
"learning_rate": 2.0075521269203654e-05,
"loss": 7.4747,
"step": 472000
},
{
"epoch": 1.8,
"learning_rate": 2.00438216095312e-05,
"loss": 7.4935,
"step": 472500
},
{
"epoch": 1.8,
"learning_rate": 2.0012121949858748e-05,
"loss": 7.4952,
"step": 473000
},
{
"epoch": 1.8,
"learning_rate": 1.9980422290186294e-05,
"loss": 7.4861,
"step": 473500
},
{
"epoch": 1.8,
"learning_rate": 1.9948722630513838e-05,
"loss": 7.4757,
"step": 474000
},
{
"epoch": 1.8,
"learning_rate": 1.9917022970841388e-05,
"loss": 7.4784,
"step": 474500
},
{
"epoch": 1.81,
"learning_rate": 1.988532331116893e-05,
"loss": 7.5014,
"step": 475000
},
{
"epoch": 1.81,
"learning_rate": 1.9853623651496478e-05,
"loss": 7.4779,
"step": 475500
},
{
"epoch": 1.81,
"learning_rate": 1.9821923991824024e-05,
"loss": 7.4747,
"step": 476000
},
{
"epoch": 1.81,
"learning_rate": 1.979022433215157e-05,
"loss": 7.4924,
"step": 476500
},
{
"epoch": 1.81,
"learning_rate": 1.9758524672479118e-05,
"loss": 7.4931,
"step": 477000
},
{
"epoch": 1.82,
"learning_rate": 1.9726825012806664e-05,
"loss": 7.5003,
"step": 477500
},
{
"epoch": 1.82,
"learning_rate": 1.9695125353134208e-05,
"loss": 7.4917,
"step": 478000
},
{
"epoch": 1.82,
"learning_rate": 1.9663425693461758e-05,
"loss": 7.485,
"step": 478500
},
{
"epoch": 1.82,
"learning_rate": 1.96317260337893e-05,
"loss": 7.4869,
"step": 479000
},
{
"epoch": 1.82,
"learning_rate": 1.9600026374116848e-05,
"loss": 7.5071,
"step": 479500
},
{
"epoch": 1.83,
"learning_rate": 1.9568326714444395e-05,
"loss": 7.4769,
"step": 480000
},
{
"epoch": 1.83,
"learning_rate": 1.953662705477194e-05,
"loss": 7.4922,
"step": 480500
},
{
"epoch": 1.83,
"learning_rate": 1.9504927395099488e-05,
"loss": 7.4746,
"step": 481000
},
{
"epoch": 1.83,
"learning_rate": 1.947322773542703e-05,
"loss": 7.4716,
"step": 481500
},
{
"epoch": 1.83,
"learning_rate": 1.944152807575458e-05,
"loss": 7.4843,
"step": 482000
},
{
"epoch": 1.84,
"learning_rate": 1.9409828416082128e-05,
"loss": 7.4858,
"step": 482500
},
{
"epoch": 1.84,
"learning_rate": 1.937812875640967e-05,
"loss": 7.4732,
"step": 483000
},
{
"epoch": 1.84,
"learning_rate": 1.9346429096737218e-05,
"loss": 7.4873,
"step": 483500
},
{
"epoch": 1.84,
"learning_rate": 1.9314729437064765e-05,
"loss": 7.4883,
"step": 484000
},
{
"epoch": 1.84,
"learning_rate": 1.928302977739231e-05,
"loss": 7.4897,
"step": 484500
},
{
"epoch": 1.84,
"learning_rate": 1.9251330117719858e-05,
"loss": 7.4871,
"step": 485000
},
{
"epoch": 1.85,
"learning_rate": 1.92196304580474e-05,
"loss": 7.4895,
"step": 485500
},
{
"epoch": 1.85,
"learning_rate": 1.918793079837495e-05,
"loss": 7.5044,
"step": 486000
},
{
"epoch": 1.85,
"learning_rate": 1.9156231138702495e-05,
"loss": 7.4946,
"step": 486500
},
{
"epoch": 1.85,
"learning_rate": 1.912453147903004e-05,
"loss": 7.4789,
"step": 487000
},
{
"epoch": 1.85,
"learning_rate": 1.909283181935759e-05,
"loss": 7.48,
"step": 487500
},
{
"epoch": 1.86,
"learning_rate": 1.9061132159685135e-05,
"loss": 7.4858,
"step": 488000
},
{
"epoch": 1.86,
"learning_rate": 1.9029432500012682e-05,
"loss": 7.5018,
"step": 488500
},
{
"epoch": 1.86,
"learning_rate": 1.8997732840340225e-05,
"loss": 7.4792,
"step": 489000
},
{
"epoch": 1.86,
"learning_rate": 1.8966033180667772e-05,
"loss": 7.4872,
"step": 489500
},
{
"epoch": 1.86,
"learning_rate": 1.8934333520995322e-05,
"loss": 7.4953,
"step": 490000
},
{
"epoch": 1.87,
"learning_rate": 1.8902633861322865e-05,
"loss": 7.4795,
"step": 490500
},
{
"epoch": 1.87,
"learning_rate": 1.8870934201650412e-05,
"loss": 7.4935,
"step": 491000
},
{
"epoch": 1.87,
"learning_rate": 1.883923454197796e-05,
"loss": 7.4772,
"step": 491500
},
{
"epoch": 1.87,
"learning_rate": 1.8807534882305505e-05,
"loss": 7.4874,
"step": 492000
},
{
"epoch": 1.87,
"learning_rate": 1.8775835222633052e-05,
"loss": 7.4835,
"step": 492500
},
{
"epoch": 1.88,
"learning_rate": 1.8744135562960595e-05,
"loss": 7.4861,
"step": 493000
},
{
"epoch": 1.88,
"learning_rate": 1.8712435903288142e-05,
"loss": 7.49,
"step": 493500
},
{
"epoch": 1.88,
"learning_rate": 1.868073624361569e-05,
"loss": 7.4842,
"step": 494000
},
{
"epoch": 1.88,
"learning_rate": 1.8649036583943235e-05,
"loss": 7.4898,
"step": 494500
},
{
"epoch": 1.88,
"learning_rate": 1.8617336924270782e-05,
"loss": 7.5066,
"step": 495000
},
{
"epoch": 1.88,
"learning_rate": 1.858563726459833e-05,
"loss": 7.5039,
"step": 495500
},
{
"epoch": 1.89,
"learning_rate": 1.8553937604925876e-05,
"loss": 7.4901,
"step": 496000
},
{
"epoch": 1.89,
"learning_rate": 1.8522237945253422e-05,
"loss": 7.4683,
"step": 496500
},
{
"epoch": 1.89,
"learning_rate": 1.8490538285580966e-05,
"loss": 7.4971,
"step": 497000
},
{
"epoch": 1.89,
"learning_rate": 1.8458838625908516e-05,
"loss": 7.4787,
"step": 497500
},
{
"epoch": 1.89,
"learning_rate": 1.842713896623606e-05,
"loss": 7.4902,
"step": 498000
},
{
"epoch": 1.9,
"learning_rate": 1.8395439306563606e-05,
"loss": 7.4826,
"step": 498500
},
{
"epoch": 1.9,
"learning_rate": 1.8363739646891152e-05,
"loss": 7.4757,
"step": 499000
},
{
"epoch": 1.9,
"learning_rate": 1.83320399872187e-05,
"loss": 7.4912,
"step": 499500
},
{
"epoch": 1.9,
"learning_rate": 1.8300340327546246e-05,
"loss": 7.4803,
"step": 500000
},
{
"epoch": 1.9,
"learning_rate": 1.826864066787379e-05,
"loss": 7.4893,
"step": 500500
},
{
"epoch": 1.91,
"learning_rate": 1.8236941008201336e-05,
"loss": 7.4932,
"step": 501000
},
{
"epoch": 1.91,
"learning_rate": 1.8205241348528883e-05,
"loss": 7.4942,
"step": 501500
},
{
"epoch": 1.91,
"learning_rate": 1.817354168885643e-05,
"loss": 7.4846,
"step": 502000
},
{
"epoch": 1.91,
"learning_rate": 1.8141842029183976e-05,
"loss": 7.501,
"step": 502500
},
{
"epoch": 1.91,
"learning_rate": 1.811014236951152e-05,
"loss": 7.4974,
"step": 503000
},
{
"epoch": 1.92,
"learning_rate": 1.807844270983907e-05,
"loss": 7.485,
"step": 503500
},
{
"epoch": 1.92,
"learning_rate": 1.8046743050166616e-05,
"loss": 7.4922,
"step": 504000
},
{
"epoch": 1.92,
"learning_rate": 1.801504339049416e-05,
"loss": 7.4862,
"step": 504500
},
{
"epoch": 1.92,
"learning_rate": 1.7983343730821706e-05,
"loss": 7.4751,
"step": 505000
},
{
"epoch": 1.92,
"learning_rate": 1.7951644071149253e-05,
"loss": 7.4911,
"step": 505500
},
{
"epoch": 1.92,
"learning_rate": 1.79199444114768e-05,
"loss": 7.4885,
"step": 506000
},
{
"epoch": 1.93,
"learning_rate": 1.7888244751804346e-05,
"loss": 7.4996,
"step": 506500
},
{
"epoch": 1.93,
"learning_rate": 1.785654509213189e-05,
"loss": 7.4842,
"step": 507000
},
{
"epoch": 1.93,
"learning_rate": 1.782484543245944e-05,
"loss": 7.482,
"step": 507500
},
{
"epoch": 1.93,
"learning_rate": 1.7793145772786983e-05,
"loss": 7.5013,
"step": 508000
},
{
"epoch": 1.93,
"learning_rate": 1.776144611311453e-05,
"loss": 7.4943,
"step": 508500
},
{
"epoch": 1.94,
"learning_rate": 1.7729746453442076e-05,
"loss": 7.4914,
"step": 509000
},
{
"epoch": 1.94,
"learning_rate": 1.7698046793769623e-05,
"loss": 7.4987,
"step": 509500
},
{
"epoch": 1.94,
"learning_rate": 1.766634713409717e-05,
"loss": 7.4847,
"step": 510000
},
{
"epoch": 1.94,
"learning_rate": 1.7634647474424716e-05,
"loss": 7.4991,
"step": 510500
},
{
"epoch": 1.94,
"learning_rate": 1.760294781475226e-05,
"loss": 7.5086,
"step": 511000
},
{
"epoch": 1.95,
"learning_rate": 1.757124815507981e-05,
"loss": 7.4931,
"step": 511500
},
{
"epoch": 1.95,
"learning_rate": 1.7539548495407353e-05,
"loss": 7.4879,
"step": 512000
},
{
"epoch": 1.95,
"learning_rate": 1.75078488357349e-05,
"loss": 7.4858,
"step": 512500
},
{
"epoch": 1.95,
"learning_rate": 1.7476149176062447e-05,
"loss": 7.4899,
"step": 513000
},
{
"epoch": 1.95,
"learning_rate": 1.7444449516389993e-05,
"loss": 7.4803,
"step": 513500
},
{
"epoch": 1.96,
"learning_rate": 1.741274985671754e-05,
"loss": 7.4752,
"step": 514000
},
{
"epoch": 1.96,
"learning_rate": 1.7381050197045083e-05,
"loss": 7.4754,
"step": 514500
},
{
"epoch": 1.96,
"learning_rate": 1.7349350537372633e-05,
"loss": 7.4866,
"step": 515000
},
{
"epoch": 1.96,
"learning_rate": 1.7317650877700177e-05,
"loss": 7.4782,
"step": 515500
},
{
"epoch": 1.96,
"learning_rate": 1.7285951218027723e-05,
"loss": 7.4994,
"step": 516000
},
{
"epoch": 1.96,
"learning_rate": 1.725425155835527e-05,
"loss": 7.4918,
"step": 516500
},
{
"epoch": 1.97,
"learning_rate": 1.7222551898682817e-05,
"loss": 7.4669,
"step": 517000
},
{
"epoch": 1.97,
"learning_rate": 1.7190852239010364e-05,
"loss": 7.4877,
"step": 517500
},
{
"epoch": 1.97,
"learning_rate": 1.715915257933791e-05,
"loss": 7.4764,
"step": 518000
},
{
"epoch": 1.97,
"learning_rate": 1.7127452919665454e-05,
"loss": 7.4781,
"step": 518500
},
{
"epoch": 1.97,
"learning_rate": 1.7095753259993004e-05,
"loss": 7.486,
"step": 519000
},
{
"epoch": 1.98,
"learning_rate": 1.7064053600320547e-05,
"loss": 7.4951,
"step": 519500
},
{
"epoch": 1.98,
"learning_rate": 1.7032353940648094e-05,
"loss": 7.4638,
"step": 520000
},
{
"epoch": 1.98,
"learning_rate": 1.700065428097564e-05,
"loss": 7.4869,
"step": 520500
},
{
"epoch": 1.98,
"learning_rate": 1.6968954621303187e-05,
"loss": 7.4846,
"step": 521000
},
{
"epoch": 1.98,
"learning_rate": 1.6937254961630734e-05,
"loss": 7.4765,
"step": 521500
},
{
"epoch": 1.99,
"learning_rate": 1.6905555301958277e-05,
"loss": 7.5108,
"step": 522000
},
{
"epoch": 1.99,
"learning_rate": 1.6873855642285824e-05,
"loss": 7.4799,
"step": 522500
},
{
"epoch": 1.99,
"learning_rate": 1.6842155982613374e-05,
"loss": 7.4881,
"step": 523000
},
{
"epoch": 1.99,
"learning_rate": 1.6810456322940917e-05,
"loss": 7.4677,
"step": 523500
},
{
"epoch": 1.99,
"learning_rate": 1.6778756663268464e-05,
"loss": 7.4847,
"step": 524000
},
{
"epoch": 2.0,
"learning_rate": 1.674705700359601e-05,
"loss": 7.4958,
"step": 524500
},
{
"epoch": 2.0,
"learning_rate": 1.6715357343923557e-05,
"loss": 7.4829,
"step": 525000
},
{
"epoch": 2.0,
"learning_rate": 1.6683657684251104e-05,
"loss": 7.4905,
"step": 525500
},
{
"epoch": 2.0,
"learning_rate": 1.6651958024578647e-05,
"loss": 7.4893,
"step": 526000
},
{
"epoch": 2.0,
"learning_rate": 1.6620258364906194e-05,
"loss": 7.4913,
"step": 526500
},
{
"epoch": 2.0,
"learning_rate": 1.658855870523374e-05,
"loss": 7.495,
"step": 527000
},
{
"epoch": 2.01,
"learning_rate": 1.6556859045561287e-05,
"loss": 7.4846,
"step": 527500
},
{
"epoch": 2.01,
"learning_rate": 1.6525159385888834e-05,
"loss": 7.4968,
"step": 528000
},
{
"epoch": 2.01,
"learning_rate": 1.649345972621638e-05,
"loss": 7.4936,
"step": 528500
},
{
"epoch": 2.01,
"learning_rate": 1.6461760066543928e-05,
"loss": 7.488,
"step": 529000
},
{
"epoch": 2.01,
"learning_rate": 1.643006040687147e-05,
"loss": 7.4935,
"step": 529500
},
{
"epoch": 2.02,
"learning_rate": 1.6398360747199018e-05,
"loss": 7.485,
"step": 530000
},
{
"epoch": 2.02,
"learning_rate": 1.6366661087526568e-05,
"loss": 7.4838,
"step": 530500
},
{
"epoch": 2.02,
"learning_rate": 1.633496142785411e-05,
"loss": 7.5033,
"step": 531000
},
{
"epoch": 2.02,
"learning_rate": 1.6303261768181658e-05,
"loss": 7.5058,
"step": 531500
},
{
"epoch": 2.02,
"learning_rate": 1.6271562108509204e-05,
"loss": 7.4735,
"step": 532000
},
{
"epoch": 2.03,
"learning_rate": 1.623986244883675e-05,
"loss": 7.5094,
"step": 532500
},
{
"epoch": 2.03,
"learning_rate": 1.6208162789164298e-05,
"loss": 7.468,
"step": 533000
},
{
"epoch": 2.03,
"learning_rate": 1.617646312949184e-05,
"loss": 7.4916,
"step": 533500
},
{
"epoch": 2.03,
"learning_rate": 1.6144763469819388e-05,
"loss": 7.4981,
"step": 534000
},
{
"epoch": 2.03,
"learning_rate": 1.6113063810146935e-05,
"loss": 7.4882,
"step": 534500
},
{
"epoch": 2.04,
"learning_rate": 1.608136415047448e-05,
"loss": 7.488,
"step": 535000
},
{
"epoch": 2.04,
"learning_rate": 1.6049664490802028e-05,
"loss": 7.4897,
"step": 535500
},
{
"epoch": 2.04,
"learning_rate": 1.601796483112957e-05,
"loss": 7.4763,
"step": 536000
},
{
"epoch": 2.04,
"learning_rate": 1.598626517145712e-05,
"loss": 7.4969,
"step": 536500
},
{
"epoch": 2.04,
"learning_rate": 1.5954565511784668e-05,
"loss": 7.4741,
"step": 537000
},
{
"epoch": 2.04,
"learning_rate": 1.592286585211221e-05,
"loss": 7.4865,
"step": 537500
},
{
"epoch": 2.05,
"learning_rate": 1.5891166192439758e-05,
"loss": 7.4717,
"step": 538000
},
{
"epoch": 2.05,
"learning_rate": 1.5859466532767305e-05,
"loss": 7.496,
"step": 538500
},
{
"epoch": 2.05,
"learning_rate": 1.582776687309485e-05,
"loss": 7.4946,
"step": 539000
},
{
"epoch": 2.05,
"learning_rate": 1.5796067213422398e-05,
"loss": 7.4919,
"step": 539500
},
{
"epoch": 2.05,
"learning_rate": 1.576436755374994e-05,
"loss": 7.4883,
"step": 540000
},
{
"epoch": 2.06,
"learning_rate": 1.573266789407749e-05,
"loss": 7.4776,
"step": 540500
},
{
"epoch": 2.06,
"learning_rate": 1.5700968234405035e-05,
"loss": 7.4971,
"step": 541000
},
{
"epoch": 2.06,
"learning_rate": 1.566926857473258e-05,
"loss": 7.4961,
"step": 541500
},
{
"epoch": 2.06,
"learning_rate": 1.563756891506013e-05,
"loss": 7.4906,
"step": 542000
},
{
"epoch": 2.06,
"learning_rate": 1.5605869255387675e-05,
"loss": 7.5022,
"step": 542500
},
{
"epoch": 2.07,
"learning_rate": 1.5574169595715222e-05,
"loss": 7.4833,
"step": 543000
},
{
"epoch": 2.07,
"learning_rate": 1.5542469936042765e-05,
"loss": 7.4884,
"step": 543500
},
{
"epoch": 2.07,
"learning_rate": 1.5510770276370315e-05,
"loss": 7.4796,
"step": 544000
},
{
"epoch": 2.07,
"learning_rate": 1.5479070616697862e-05,
"loss": 7.4927,
"step": 544500
},
{
"epoch": 2.07,
"learning_rate": 1.5447370957025405e-05,
"loss": 7.478,
"step": 545000
},
{
"epoch": 2.08,
"learning_rate": 1.5415671297352952e-05,
"loss": 7.4795,
"step": 545500
},
{
"epoch": 2.08,
"learning_rate": 1.53839716376805e-05,
"loss": 7.4672,
"step": 546000
},
{
"epoch": 2.08,
"learning_rate": 1.5352271978008045e-05,
"loss": 7.4848,
"step": 546500
},
{
"epoch": 2.08,
"learning_rate": 1.5320572318335592e-05,
"loss": 7.4975,
"step": 547000
},
{
"epoch": 2.08,
"learning_rate": 1.5288872658663135e-05,
"loss": 7.486,
"step": 547500
},
{
"epoch": 2.08,
"learning_rate": 1.5257172998990685e-05,
"loss": 7.5042,
"step": 548000
},
{
"epoch": 2.09,
"learning_rate": 1.522547333931823e-05,
"loss": 7.4824,
"step": 548500
},
{
"epoch": 2.09,
"learning_rate": 1.5193773679645775e-05,
"loss": 7.4905,
"step": 549000
},
{
"epoch": 2.09,
"learning_rate": 1.5162074019973322e-05,
"loss": 7.4875,
"step": 549500
},
{
"epoch": 2.09,
"learning_rate": 1.5130374360300869e-05,
"loss": 7.4709,
"step": 550000
},
{
"epoch": 2.09,
"learning_rate": 1.5098674700628416e-05,
"loss": 7.484,
"step": 550500
},
{
"epoch": 2.1,
"learning_rate": 1.506697504095596e-05,
"loss": 7.4856,
"step": 551000
},
{
"epoch": 2.1,
"learning_rate": 1.5035275381283506e-05,
"loss": 7.4796,
"step": 551500
},
{
"epoch": 2.1,
"learning_rate": 1.5003575721611054e-05,
"loss": 7.4841,
"step": 552000
},
{
"epoch": 2.1,
"learning_rate": 1.49718760619386e-05,
"loss": 7.4878,
"step": 552500
},
{
"epoch": 2.1,
"learning_rate": 1.4940176402266146e-05,
"loss": 7.5088,
"step": 553000
},
{
"epoch": 2.11,
"learning_rate": 1.490847674259369e-05,
"loss": 7.4853,
"step": 553500
},
{
"epoch": 2.11,
"learning_rate": 1.4876777082921239e-05,
"loss": 7.4974,
"step": 554000
},
{
"epoch": 2.11,
"learning_rate": 1.4845077423248784e-05,
"loss": 7.4835,
"step": 554500
},
{
"epoch": 2.11,
"learning_rate": 1.481337776357633e-05,
"loss": 7.4858,
"step": 555000
},
{
"epoch": 2.11,
"learning_rate": 1.4781678103903876e-05,
"loss": 7.4657,
"step": 555500
},
{
"epoch": 2.12,
"learning_rate": 1.4749978444231424e-05,
"loss": 7.487,
"step": 556000
},
{
"epoch": 2.12,
"learning_rate": 1.471827878455897e-05,
"loss": 7.4915,
"step": 556500
},
{
"epoch": 2.12,
"learning_rate": 1.4686579124886516e-05,
"loss": 7.4858,
"step": 557000
},
{
"epoch": 2.12,
"learning_rate": 1.4654879465214061e-05,
"loss": 7.4788,
"step": 557500
},
{
"epoch": 2.12,
"learning_rate": 1.462317980554161e-05,
"loss": 7.4807,
"step": 558000
},
{
"epoch": 2.12,
"learning_rate": 1.4591480145869154e-05,
"loss": 7.4823,
"step": 558500
},
{
"epoch": 2.13,
"learning_rate": 1.4559780486196701e-05,
"loss": 7.4924,
"step": 559000
},
{
"epoch": 2.13,
"learning_rate": 1.4528080826524248e-05,
"loss": 7.4928,
"step": 559500
},
{
"epoch": 2.13,
"learning_rate": 1.4496381166851794e-05,
"loss": 7.4887,
"step": 560000
},
{
"epoch": 2.13,
"learning_rate": 1.446468150717934e-05,
"loss": 7.487,
"step": 560500
},
{
"epoch": 2.13,
"learning_rate": 1.4432981847506884e-05,
"loss": 7.4855,
"step": 561000
},
{
"epoch": 2.14,
"learning_rate": 1.4401282187834433e-05,
"loss": 7.4952,
"step": 561500
},
{
"epoch": 2.14,
"learning_rate": 1.436958252816198e-05,
"loss": 7.494,
"step": 562000
},
{
"epoch": 2.14,
"learning_rate": 1.4337882868489525e-05,
"loss": 7.4929,
"step": 562500
},
{
"epoch": 2.14,
"learning_rate": 1.430618320881707e-05,
"loss": 7.4806,
"step": 563000
},
{
"epoch": 2.14,
"learning_rate": 1.4274483549144618e-05,
"loss": 7.4835,
"step": 563500
},
{
"epoch": 2.15,
"learning_rate": 1.4242783889472163e-05,
"loss": 7.4992,
"step": 564000
},
{
"epoch": 2.15,
"learning_rate": 1.421108422979971e-05,
"loss": 7.4782,
"step": 564500
},
{
"epoch": 2.15,
"learning_rate": 1.4179384570127255e-05,
"loss": 7.4948,
"step": 565000
},
{
"epoch": 2.15,
"learning_rate": 1.4147684910454803e-05,
"loss": 7.4824,
"step": 565500
},
{
"epoch": 2.15,
"learning_rate": 1.4115985250782348e-05,
"loss": 7.4814,
"step": 566000
},
{
"epoch": 2.15,
"learning_rate": 1.4084285591109895e-05,
"loss": 7.4857,
"step": 566500
},
{
"epoch": 2.16,
"learning_rate": 1.405258593143744e-05,
"loss": 7.4909,
"step": 567000
},
{
"epoch": 2.16,
"learning_rate": 1.4020886271764988e-05,
"loss": 7.4852,
"step": 567500
},
{
"epoch": 2.16,
"learning_rate": 1.3989186612092533e-05,
"loss": 7.4942,
"step": 568000
},
{
"epoch": 2.16,
"learning_rate": 1.3957486952420078e-05,
"loss": 7.4776,
"step": 568500
},
{
"epoch": 2.16,
"learning_rate": 1.3925787292747625e-05,
"loss": 7.4865,
"step": 569000
},
{
"epoch": 2.17,
"learning_rate": 1.3894087633075173e-05,
"loss": 7.479,
"step": 569500
},
{
"epoch": 2.17,
"learning_rate": 1.3862387973402718e-05,
"loss": 7.4894,
"step": 570000
},
{
"epoch": 2.17,
"learning_rate": 1.3830688313730263e-05,
"loss": 7.4859,
"step": 570500
},
{
"epoch": 2.17,
"learning_rate": 1.379898865405781e-05,
"loss": 7.5113,
"step": 571000
},
{
"epoch": 2.17,
"learning_rate": 1.3767288994385358e-05,
"loss": 7.4906,
"step": 571500
},
{
"epoch": 2.18,
"learning_rate": 1.3735589334712903e-05,
"loss": 7.4839,
"step": 572000
},
{
"epoch": 2.18,
"learning_rate": 1.3703889675040448e-05,
"loss": 7.4801,
"step": 572500
},
{
"epoch": 2.18,
"learning_rate": 1.3672190015367995e-05,
"loss": 7.4786,
"step": 573000
},
{
"epoch": 2.18,
"learning_rate": 1.3640490355695542e-05,
"loss": 7.4568,
"step": 573500
},
{
"epoch": 2.18,
"learning_rate": 1.3608790696023089e-05,
"loss": 7.4822,
"step": 574000
},
{
"epoch": 2.19,
"learning_rate": 1.3577091036350634e-05,
"loss": 7.5005,
"step": 574500
},
{
"epoch": 2.19,
"learning_rate": 1.3545391376678179e-05,
"loss": 7.4834,
"step": 575000
},
{
"epoch": 2.19,
"learning_rate": 1.3513691717005727e-05,
"loss": 7.4912,
"step": 575500
},
{
"epoch": 2.19,
"learning_rate": 1.3481992057333274e-05,
"loss": 7.489,
"step": 576000
},
{
"epoch": 2.19,
"learning_rate": 1.3450292397660819e-05,
"loss": 7.4785,
"step": 576500
},
{
"epoch": 2.19,
"learning_rate": 1.3418592737988367e-05,
"loss": 7.4709,
"step": 577000
},
{
"epoch": 2.2,
"learning_rate": 1.3386893078315912e-05,
"loss": 7.5052,
"step": 577500
},
{
"epoch": 2.2,
"learning_rate": 1.3355193418643457e-05,
"loss": 7.4848,
"step": 578000
},
{
"epoch": 2.2,
"learning_rate": 1.3323493758971004e-05,
"loss": 7.4859,
"step": 578500
},
{
"epoch": 2.2,
"learning_rate": 1.3291794099298552e-05,
"loss": 7.4929,
"step": 579000
},
{
"epoch": 2.2,
"learning_rate": 1.3260094439626097e-05,
"loss": 7.4868,
"step": 579500
},
{
"epoch": 2.21,
"learning_rate": 1.3228394779953642e-05,
"loss": 7.4788,
"step": 580000
},
{
"epoch": 2.21,
"learning_rate": 1.3196695120281189e-05,
"loss": 7.4951,
"step": 580500
},
{
"epoch": 2.21,
"learning_rate": 1.3164995460608737e-05,
"loss": 7.4951,
"step": 581000
},
{
"epoch": 2.21,
"learning_rate": 1.3133295800936282e-05,
"loss": 7.4861,
"step": 581500
},
{
"epoch": 2.21,
"learning_rate": 1.3101596141263827e-05,
"loss": 7.4999,
"step": 582000
},
{
"epoch": 2.22,
"learning_rate": 1.3069896481591372e-05,
"loss": 7.4914,
"step": 582500
},
{
"epoch": 2.22,
"learning_rate": 1.303819682191892e-05,
"loss": 7.49,
"step": 583000
},
{
"epoch": 2.22,
"learning_rate": 1.3006497162246468e-05,
"loss": 7.4875,
"step": 583500
},
{
"epoch": 2.22,
"learning_rate": 1.2974797502574013e-05,
"loss": 7.5048,
"step": 584000
},
{
"epoch": 2.22,
"learning_rate": 1.2943097842901558e-05,
"loss": 7.4962,
"step": 584500
},
{
"epoch": 2.23,
"learning_rate": 1.2911398183229106e-05,
"loss": 7.4774,
"step": 585000
},
{
"epoch": 2.23,
"learning_rate": 1.2879698523556653e-05,
"loss": 7.4836,
"step": 585500
},
{
"epoch": 2.23,
"learning_rate": 1.2847998863884198e-05,
"loss": 7.502,
"step": 586000
},
{
"epoch": 2.23,
"learning_rate": 1.2816299204211743e-05,
"loss": 7.4826,
"step": 586500
},
{
"epoch": 2.23,
"learning_rate": 1.2784599544539291e-05,
"loss": 7.4802,
"step": 587000
},
{
"epoch": 2.23,
"learning_rate": 1.2752899884866836e-05,
"loss": 7.4735,
"step": 587500
},
{
"epoch": 2.24,
"learning_rate": 1.2721200225194383e-05,
"loss": 7.4813,
"step": 588000
},
{
"epoch": 2.24,
"learning_rate": 1.2689500565521928e-05,
"loss": 7.4912,
"step": 588500
},
{
"epoch": 2.24,
"learning_rate": 1.2657800905849476e-05,
"loss": 7.483,
"step": 589000
},
{
"epoch": 2.24,
"learning_rate": 1.2626101246177021e-05,
"loss": 7.4972,
"step": 589500
},
{
"epoch": 2.24,
"learning_rate": 1.2594401586504568e-05,
"loss": 7.4872,
"step": 590000
},
{
"epoch": 2.25,
"learning_rate": 1.2562701926832113e-05,
"loss": 7.4886,
"step": 590500
},
{
"epoch": 2.25,
"learning_rate": 1.2531002267159661e-05,
"loss": 7.4957,
"step": 591000
},
{
"epoch": 2.25,
"learning_rate": 1.2499302607487206e-05,
"loss": 7.495,
"step": 591500
},
{
"epoch": 2.25,
"learning_rate": 1.2467602947814753e-05,
"loss": 7.472,
"step": 592000
},
{
"epoch": 2.25,
"learning_rate": 1.24359032881423e-05,
"loss": 7.485,
"step": 592500
},
{
"epoch": 2.26,
"learning_rate": 1.2404203628469846e-05,
"loss": 7.4938,
"step": 593000
},
{
"epoch": 2.26,
"learning_rate": 1.2372503968797391e-05,
"loss": 7.4972,
"step": 593500
},
{
"epoch": 2.26,
"learning_rate": 1.2340804309124938e-05,
"loss": 7.485,
"step": 594000
},
{
"epoch": 2.26,
"learning_rate": 1.2309104649452483e-05,
"loss": 7.4855,
"step": 594500
},
{
"epoch": 2.26,
"learning_rate": 1.2277404989780032e-05,
"loss": 7.488,
"step": 595000
},
{
"epoch": 2.27,
"learning_rate": 1.2245705330107577e-05,
"loss": 7.4869,
"step": 595500
},
{
"epoch": 2.27,
"learning_rate": 1.2214005670435123e-05,
"loss": 7.4852,
"step": 596000
},
{
"epoch": 2.27,
"learning_rate": 1.2182306010762668e-05,
"loss": 7.4858,
"step": 596500
},
{
"epoch": 2.27,
"learning_rate": 1.2150606351090215e-05,
"loss": 7.4779,
"step": 597000
},
{
"epoch": 2.27,
"learning_rate": 1.2118906691417762e-05,
"loss": 7.4977,
"step": 597500
},
{
"epoch": 2.27,
"learning_rate": 1.2087207031745308e-05,
"loss": 7.4837,
"step": 598000
},
{
"epoch": 2.28,
"learning_rate": 1.2055507372072853e-05,
"loss": 7.4878,
"step": 598500
},
{
"epoch": 2.28,
"learning_rate": 1.20238077124004e-05,
"loss": 7.5044,
"step": 599000
},
{
"epoch": 2.28,
"learning_rate": 1.1992108052727947e-05,
"loss": 7.4806,
"step": 599500
},
{
"epoch": 2.28,
"learning_rate": 1.1960408393055494e-05,
"loss": 7.4817,
"step": 600000
},
{
"epoch": 2.28,
"learning_rate": 1.1928708733383039e-05,
"loss": 7.4771,
"step": 600500
},
{
"epoch": 2.29,
"learning_rate": 1.1897009073710585e-05,
"loss": 7.4823,
"step": 601000
},
{
"epoch": 2.29,
"learning_rate": 1.186530941403813e-05,
"loss": 7.4791,
"step": 601500
},
{
"epoch": 2.29,
"learning_rate": 1.1833609754365679e-05,
"loss": 7.5043,
"step": 602000
},
{
"epoch": 2.29,
"learning_rate": 1.1801910094693224e-05,
"loss": 7.493,
"step": 602500
},
{
"epoch": 2.29,
"learning_rate": 1.177021043502077e-05,
"loss": 7.4888,
"step": 603000
},
{
"epoch": 2.3,
"learning_rate": 1.1738510775348315e-05,
"loss": 7.4986,
"step": 603500
},
{
"epoch": 2.3,
"learning_rate": 1.1706811115675862e-05,
"loss": 7.4765,
"step": 604000
},
{
"epoch": 2.3,
"learning_rate": 1.1675111456003409e-05,
"loss": 7.4994,
"step": 604500
},
{
"epoch": 2.3,
"learning_rate": 1.1643411796330955e-05,
"loss": 7.4915,
"step": 605000
},
{
"epoch": 2.3,
"learning_rate": 1.16117121366585e-05,
"loss": 7.4944,
"step": 605500
},
{
"epoch": 2.31,
"learning_rate": 1.1580012476986047e-05,
"loss": 7.4793,
"step": 606000
},
{
"epoch": 2.31,
"learning_rate": 1.1548312817313594e-05,
"loss": 7.4996,
"step": 606500
},
{
"epoch": 2.31,
"learning_rate": 1.151661315764114e-05,
"loss": 7.4913,
"step": 607000
},
{
"epoch": 2.31,
"learning_rate": 1.1484913497968687e-05,
"loss": 7.4759,
"step": 607500
},
{
"epoch": 2.31,
"learning_rate": 1.1453213838296232e-05,
"loss": 7.4876,
"step": 608000
},
{
"epoch": 2.31,
"learning_rate": 1.1421514178623779e-05,
"loss": 7.4641,
"step": 608500
},
{
"epoch": 2.32,
"learning_rate": 1.1389814518951326e-05,
"loss": 7.4921,
"step": 609000
},
{
"epoch": 2.32,
"learning_rate": 1.1358114859278872e-05,
"loss": 7.4813,
"step": 609500
},
{
"epoch": 2.32,
"learning_rate": 1.1326415199606417e-05,
"loss": 7.5045,
"step": 610000
},
{
"epoch": 2.32,
"learning_rate": 1.1294715539933964e-05,
"loss": 7.4985,
"step": 610500
},
{
"epoch": 2.32,
"learning_rate": 1.1263015880261509e-05,
"loss": 7.4815,
"step": 611000
},
{
"epoch": 2.33,
"learning_rate": 1.1231316220589058e-05,
"loss": 7.4762,
"step": 611500
},
{
"epoch": 2.33,
"learning_rate": 1.1199616560916603e-05,
"loss": 7.4714,
"step": 612000
},
{
"epoch": 2.33,
"learning_rate": 1.116791690124415e-05,
"loss": 7.5055,
"step": 612500
},
{
"epoch": 2.33,
"learning_rate": 1.1136217241571694e-05,
"loss": 7.5009,
"step": 613000
},
{
"epoch": 2.33,
"learning_rate": 1.1104517581899241e-05,
"loss": 7.4863,
"step": 613500
},
{
"epoch": 2.34,
"learning_rate": 1.1072817922226788e-05,
"loss": 7.4795,
"step": 614000
},
{
"epoch": 2.34,
"learning_rate": 1.1041118262554334e-05,
"loss": 7.4836,
"step": 614500
},
{
"epoch": 2.34,
"learning_rate": 1.100941860288188e-05,
"loss": 7.5069,
"step": 615000
},
{
"epoch": 2.34,
"learning_rate": 1.0977718943209426e-05,
"loss": 7.495,
"step": 615500
},
{
"epoch": 2.34,
"learning_rate": 1.0946019283536973e-05,
"loss": 7.4857,
"step": 616000
},
{
"epoch": 2.35,
"learning_rate": 1.091431962386452e-05,
"loss": 7.4908,
"step": 616500
},
{
"epoch": 2.35,
"learning_rate": 1.0882619964192065e-05,
"loss": 7.482,
"step": 617000
},
{
"epoch": 2.35,
"learning_rate": 1.0850920304519611e-05,
"loss": 7.4732,
"step": 617500
},
{
"epoch": 2.35,
"learning_rate": 1.0819220644847156e-05,
"loss": 7.4855,
"step": 618000
},
{
"epoch": 2.35,
"learning_rate": 1.0787520985174705e-05,
"loss": 7.4918,
"step": 618500
},
{
"epoch": 2.35,
"learning_rate": 1.075582132550225e-05,
"loss": 7.4965,
"step": 619000
},
{
"epoch": 2.36,
"learning_rate": 1.0724121665829796e-05,
"loss": 7.4942,
"step": 619500
},
{
"epoch": 2.36,
"learning_rate": 1.0692422006157341e-05,
"loss": 7.4977,
"step": 620000
},
{
"epoch": 2.36,
"learning_rate": 1.0660722346484888e-05,
"loss": 7.4783,
"step": 620500
},
{
"epoch": 2.36,
"learning_rate": 1.0629022686812435e-05,
"loss": 7.4989,
"step": 621000
},
{
"epoch": 2.36,
"learning_rate": 1.0597323027139981e-05,
"loss": 7.4745,
"step": 621500
},
{
"epoch": 2.37,
"learning_rate": 1.0565623367467526e-05,
"loss": 7.4794,
"step": 622000
},
{
"epoch": 2.37,
"learning_rate": 1.0533923707795073e-05,
"loss": 7.4847,
"step": 622500
},
{
"epoch": 2.37,
"learning_rate": 1.050222404812262e-05,
"loss": 7.4982,
"step": 623000
},
{
"epoch": 2.37,
"learning_rate": 1.0470524388450167e-05,
"loss": 7.503,
"step": 623500
},
{
"epoch": 2.37,
"learning_rate": 1.0438824728777713e-05,
"loss": 7.4928,
"step": 624000
},
{
"epoch": 2.38,
"learning_rate": 1.0407125069105258e-05,
"loss": 7.479,
"step": 624500
},
{
"epoch": 2.38,
"learning_rate": 1.0375425409432805e-05,
"loss": 7.4936,
"step": 625000
},
{
"epoch": 2.38,
"learning_rate": 1.0343725749760352e-05,
"loss": 7.4868,
"step": 625500
},
{
"epoch": 2.38,
"learning_rate": 1.0312026090087898e-05,
"loss": 7.4826,
"step": 626000
},
{
"epoch": 2.38,
"learning_rate": 1.0280326430415443e-05,
"loss": 7.4834,
"step": 626500
},
{
"epoch": 2.39,
"learning_rate": 1.024862677074299e-05,
"loss": 7.4945,
"step": 627000
},
{
"epoch": 2.39,
"learning_rate": 1.0216927111070535e-05,
"loss": 7.4836,
"step": 627500
},
{
"epoch": 2.39,
"learning_rate": 1.0185227451398084e-05,
"loss": 7.4862,
"step": 628000
},
{
"epoch": 2.39,
"learning_rate": 1.0153527791725629e-05,
"loss": 7.4984,
"step": 628500
},
{
"epoch": 2.39,
"learning_rate": 1.0121828132053175e-05,
"loss": 7.4911,
"step": 629000
},
{
"epoch": 2.39,
"learning_rate": 1.009012847238072e-05,
"loss": 7.4969,
"step": 629500
},
{
"epoch": 2.4,
"learning_rate": 1.0058428812708267e-05,
"loss": 7.482,
"step": 630000
},
{
"epoch": 2.4,
"learning_rate": 1.0026729153035814e-05,
"loss": 7.4814,
"step": 630500
},
{
"epoch": 2.4,
"learning_rate": 9.99502949336336e-06,
"loss": 7.498,
"step": 631000
},
{
"epoch": 2.4,
"learning_rate": 9.963329833690905e-06,
"loss": 7.4941,
"step": 631500
},
{
"epoch": 2.4,
"learning_rate": 9.931630174018452e-06,
"loss": 7.5052,
"step": 632000
},
{
"epoch": 2.41,
"learning_rate": 9.899930514345999e-06,
"loss": 7.4824,
"step": 632500
},
{
"epoch": 2.41,
"learning_rate": 9.868230854673545e-06,
"loss": 7.4937,
"step": 633000
},
{
"epoch": 2.41,
"learning_rate": 9.83653119500109e-06,
"loss": 7.5007,
"step": 633500
},
{
"epoch": 2.41,
"learning_rate": 9.804831535328637e-06,
"loss": 7.4825,
"step": 634000
},
{
"epoch": 2.41,
"learning_rate": 9.773131875656182e-06,
"loss": 7.4892,
"step": 634500
},
{
"epoch": 2.42,
"learning_rate": 9.74143221598373e-06,
"loss": 7.4957,
"step": 635000
},
{
"epoch": 2.42,
"learning_rate": 9.709732556311276e-06,
"loss": 7.4823,
"step": 635500
},
{
"epoch": 2.42,
"learning_rate": 9.678032896638822e-06,
"loss": 7.486,
"step": 636000
},
{
"epoch": 2.42,
"learning_rate": 9.646333236966367e-06,
"loss": 7.4864,
"step": 636500
},
{
"epoch": 2.42,
"learning_rate": 9.614633577293914e-06,
"loss": 7.4859,
"step": 637000
},
{
"epoch": 2.43,
"learning_rate": 9.58293391762146e-06,
"loss": 7.4927,
"step": 637500
},
{
"epoch": 2.43,
"learning_rate": 9.551234257949007e-06,
"loss": 7.4718,
"step": 638000
},
{
"epoch": 2.43,
"learning_rate": 9.519534598276552e-06,
"loss": 7.4904,
"step": 638500
},
{
"epoch": 2.43,
"learning_rate": 9.4878349386041e-06,
"loss": 7.4748,
"step": 639000
},
{
"epoch": 2.43,
"learning_rate": 9.456135278931646e-06,
"loss": 7.5012,
"step": 639500
},
{
"epoch": 2.43,
"learning_rate": 9.424435619259193e-06,
"loss": 7.4837,
"step": 640000
},
{
"epoch": 2.44,
"learning_rate": 9.39273595958674e-06,
"loss": 7.4662,
"step": 640500
},
{
"epoch": 2.44,
"learning_rate": 9.361036299914284e-06,
"loss": 7.4819,
"step": 641000
},
{
"epoch": 2.44,
"learning_rate": 9.329336640241831e-06,
"loss": 7.4823,
"step": 641500
},
{
"epoch": 2.44,
"learning_rate": 9.297636980569378e-06,
"loss": 7.4812,
"step": 642000
},
{
"epoch": 2.44,
"learning_rate": 9.265937320896924e-06,
"loss": 7.4908,
"step": 642500
},
{
"epoch": 2.45,
"learning_rate": 9.23423766122447e-06,
"loss": 7.487,
"step": 643000
},
{
"epoch": 2.45,
"learning_rate": 9.202538001552016e-06,
"loss": 7.4879,
"step": 643500
},
{
"epoch": 2.45,
"learning_rate": 9.170838341879561e-06,
"loss": 7.4821,
"step": 644000
},
{
"epoch": 2.45,
"learning_rate": 9.13913868220711e-06,
"loss": 7.5029,
"step": 644500
},
{
"epoch": 2.45,
"learning_rate": 9.107439022534655e-06,
"loss": 7.4918,
"step": 645000
},
{
"epoch": 2.46,
"learning_rate": 9.075739362862201e-06,
"loss": 7.4751,
"step": 645500
},
{
"epoch": 2.46,
"learning_rate": 9.044039703189746e-06,
"loss": 7.4986,
"step": 646000
},
{
"epoch": 2.46,
"learning_rate": 9.012340043517293e-06,
"loss": 7.495,
"step": 646500
},
{
"epoch": 2.46,
"learning_rate": 8.98064038384484e-06,
"loss": 7.4812,
"step": 647000
},
{
"epoch": 2.46,
"learning_rate": 8.948940724172386e-06,
"loss": 7.489,
"step": 647500
},
{
"epoch": 2.46,
"learning_rate": 8.917241064499931e-06,
"loss": 7.4895,
"step": 648000
},
{
"epoch": 2.47,
"learning_rate": 8.885541404827478e-06,
"loss": 7.4865,
"step": 648500
},
{
"epoch": 2.47,
"learning_rate": 8.853841745155025e-06,
"loss": 7.4895,
"step": 649000
},
{
"epoch": 2.47,
"learning_rate": 8.822142085482571e-06,
"loss": 7.4767,
"step": 649500
},
{
"epoch": 2.47,
"learning_rate": 8.790442425810116e-06,
"loss": 7.4838,
"step": 650000
},
{
"epoch": 2.47,
"learning_rate": 8.758742766137663e-06,
"loss": 7.4996,
"step": 650500
},
{
"epoch": 2.48,
"learning_rate": 8.727043106465208e-06,
"loss": 7.4857,
"step": 651000
},
{
"epoch": 2.48,
"learning_rate": 8.695343446792757e-06,
"loss": 7.4907,
"step": 651500
},
{
"epoch": 2.48,
"learning_rate": 8.663643787120302e-06,
"loss": 7.4794,
"step": 652000
},
{
"epoch": 2.48,
"learning_rate": 8.631944127447848e-06,
"loss": 7.4902,
"step": 652500
},
{
"epoch": 2.48,
"learning_rate": 8.600244467775393e-06,
"loss": 7.4896,
"step": 653000
},
{
"epoch": 2.49,
"learning_rate": 8.56854480810294e-06,
"loss": 7.482,
"step": 653500
},
{
"epoch": 2.49,
"learning_rate": 8.536845148430487e-06,
"loss": 7.4846,
"step": 654000
},
{
"epoch": 2.49,
"learning_rate": 8.505145488758033e-06,
"loss": 7.4953,
"step": 654500
},
{
"epoch": 2.49,
"learning_rate": 8.473445829085578e-06,
"loss": 7.4905,
"step": 655000
},
{
"epoch": 2.49,
"learning_rate": 8.441746169413125e-06,
"loss": 7.4971,
"step": 655500
},
{
"epoch": 2.5,
"learning_rate": 8.410046509740672e-06,
"loss": 7.4926,
"step": 656000
},
{
"epoch": 2.5,
"learning_rate": 8.378346850068219e-06,
"loss": 7.4864,
"step": 656500
},
{
"epoch": 2.5,
"learning_rate": 8.346647190395765e-06,
"loss": 7.4681,
"step": 657000
},
{
"epoch": 2.5,
"learning_rate": 8.31494753072331e-06,
"loss": 7.4832,
"step": 657500
},
{
"epoch": 2.5,
"learning_rate": 8.283247871050857e-06,
"loss": 7.4953,
"step": 658000
},
{
"epoch": 2.5,
"learning_rate": 8.251548211378404e-06,
"loss": 7.4854,
"step": 658500
},
{
"epoch": 2.51,
"learning_rate": 8.21984855170595e-06,
"loss": 7.4948,
"step": 659000
},
{
"epoch": 2.51,
"learning_rate": 8.188148892033495e-06,
"loss": 7.4983,
"step": 659500
},
{
"epoch": 2.51,
"learning_rate": 8.156449232361042e-06,
"loss": 7.4777,
"step": 660000
},
{
"epoch": 2.51,
"learning_rate": 8.124749572688587e-06,
"loss": 7.4798,
"step": 660500
},
{
"epoch": 2.51,
"learning_rate": 8.093049913016136e-06,
"loss": 7.4792,
"step": 661000
},
{
"epoch": 2.52,
"learning_rate": 8.06135025334368e-06,
"loss": 7.4759,
"step": 661500
},
{
"epoch": 2.52,
"learning_rate": 8.029650593671227e-06,
"loss": 7.4847,
"step": 662000
},
{
"epoch": 2.52,
"learning_rate": 7.997950933998772e-06,
"loss": 7.4837,
"step": 662500
},
{
"epoch": 2.52,
"learning_rate": 7.966251274326319e-06,
"loss": 7.4778,
"step": 663000
},
{
"epoch": 2.52,
"learning_rate": 7.934551614653866e-06,
"loss": 7.4882,
"step": 663500
},
{
"epoch": 2.53,
"learning_rate": 7.902851954981412e-06,
"loss": 7.4778,
"step": 664000
},
{
"epoch": 2.53,
"learning_rate": 7.871152295308957e-06,
"loss": 7.4866,
"step": 664500
},
{
"epoch": 2.53,
"learning_rate": 7.839452635636504e-06,
"loss": 7.4954,
"step": 665000
},
{
"epoch": 2.53,
"learning_rate": 7.80775297596405e-06,
"loss": 7.4884,
"step": 665500
},
{
"epoch": 2.53,
"learning_rate": 7.776053316291597e-06,
"loss": 7.4751,
"step": 666000
},
{
"epoch": 2.54,
"learning_rate": 7.744353656619142e-06,
"loss": 7.4712,
"step": 666500
},
{
"epoch": 2.54,
"learning_rate": 7.71265399694669e-06,
"loss": 7.4923,
"step": 667000
},
{
"epoch": 2.54,
"learning_rate": 7.680954337274234e-06,
"loss": 7.4964,
"step": 667500
},
{
"epoch": 2.54,
"learning_rate": 7.649254677601783e-06,
"loss": 7.499,
"step": 668000
},
{
"epoch": 2.54,
"learning_rate": 7.617555017929328e-06,
"loss": 7.4754,
"step": 668500
},
{
"epoch": 2.54,
"learning_rate": 7.585855358256874e-06,
"loss": 7.4836,
"step": 669000
},
{
"epoch": 2.55,
"learning_rate": 7.554155698584419e-06,
"loss": 7.488,
"step": 669500
},
{
"epoch": 2.55,
"learning_rate": 7.522456038911967e-06,
"loss": 7.4923,
"step": 670000
},
{
"epoch": 2.55,
"learning_rate": 7.490756379239512e-06,
"loss": 7.4793,
"step": 670500
},
{
"epoch": 2.55,
"learning_rate": 7.4590567195670594e-06,
"loss": 7.4939,
"step": 671000
},
{
"epoch": 2.55,
"learning_rate": 7.427357059894606e-06,
"loss": 7.487,
"step": 671500
},
{
"epoch": 2.56,
"learning_rate": 7.395657400222151e-06,
"loss": 7.4868,
"step": 672000
},
{
"epoch": 2.56,
"learning_rate": 7.363957740549699e-06,
"loss": 7.4842,
"step": 672500
},
{
"epoch": 2.56,
"learning_rate": 7.332258080877244e-06,
"loss": 7.4822,
"step": 673000
},
{
"epoch": 2.56,
"learning_rate": 7.300558421204791e-06,
"loss": 7.4852,
"step": 673500
},
{
"epoch": 2.56,
"learning_rate": 7.268858761532336e-06,
"loss": 7.4891,
"step": 674000
},
{
"epoch": 2.57,
"learning_rate": 7.237159101859883e-06,
"loss": 7.4841,
"step": 674500
},
{
"epoch": 2.57,
"learning_rate": 7.205459442187429e-06,
"loss": 7.4945,
"step": 675000
},
{
"epoch": 2.57,
"learning_rate": 7.1737597825149755e-06,
"loss": 7.4866,
"step": 675500
},
{
"epoch": 2.57,
"learning_rate": 7.142060122842521e-06,
"loss": 7.488,
"step": 676000
},
{
"epoch": 2.57,
"learning_rate": 7.110360463170068e-06,
"loss": 7.4688,
"step": 676500
},
{
"epoch": 2.58,
"learning_rate": 7.078660803497614e-06,
"loss": 7.4729,
"step": 677000
},
{
"epoch": 2.58,
"learning_rate": 7.046961143825161e-06,
"loss": 7.479,
"step": 677500
},
{
"epoch": 2.58,
"learning_rate": 7.0152614841527065e-06,
"loss": 7.4898,
"step": 678000
},
{
"epoch": 2.58,
"learning_rate": 6.983561824480253e-06,
"loss": 7.5005,
"step": 678500
},
{
"epoch": 2.58,
"learning_rate": 6.951862164807798e-06,
"loss": 7.4854,
"step": 679000
},
{
"epoch": 2.58,
"learning_rate": 6.920162505135346e-06,
"loss": 7.4771,
"step": 679500
},
{
"epoch": 2.59,
"learning_rate": 6.888462845462891e-06,
"loss": 7.4996,
"step": 680000
},
{
"epoch": 2.59,
"learning_rate": 6.856763185790438e-06,
"loss": 7.5023,
"step": 680500
},
{
"epoch": 2.59,
"learning_rate": 6.825063526117983e-06,
"loss": 7.4873,
"step": 681000
},
{
"epoch": 2.59,
"learning_rate": 6.79336386644553e-06,
"loss": 7.4781,
"step": 681500
},
{
"epoch": 2.59,
"learning_rate": 6.761664206773076e-06,
"loss": 7.4824,
"step": 682000
},
{
"epoch": 2.6,
"learning_rate": 6.729964547100623e-06,
"loss": 7.4792,
"step": 682500
},
{
"epoch": 2.6,
"learning_rate": 6.6982648874281685e-06,
"loss": 7.4833,
"step": 683000
},
{
"epoch": 2.6,
"learning_rate": 6.666565227755715e-06,
"loss": 7.4883,
"step": 683500
},
{
"epoch": 2.6,
"learning_rate": 6.634865568083261e-06,
"loss": 7.4876,
"step": 684000
},
{
"epoch": 2.6,
"learning_rate": 6.603165908410808e-06,
"loss": 7.4966,
"step": 684500
},
{
"epoch": 2.61,
"learning_rate": 6.571466248738354e-06,
"loss": 7.494,
"step": 685000
},
{
"epoch": 2.61,
"learning_rate": 6.5397665890659e-06,
"loss": 7.4769,
"step": 685500
},
{
"epoch": 2.61,
"learning_rate": 6.508066929393445e-06,
"loss": 7.4921,
"step": 686000
},
{
"epoch": 2.61,
"learning_rate": 6.476367269720993e-06,
"loss": 7.4946,
"step": 686500
},
{
"epoch": 2.61,
"learning_rate": 6.444667610048538e-06,
"loss": 7.4877,
"step": 687000
},
{
"epoch": 2.62,
"learning_rate": 6.4129679503760854e-06,
"loss": 7.4814,
"step": 687500
},
{
"epoch": 2.62,
"learning_rate": 6.381268290703632e-06,
"loss": 7.4903,
"step": 688000
},
{
"epoch": 2.62,
"learning_rate": 6.349568631031177e-06,
"loss": 7.4983,
"step": 688500
},
{
"epoch": 2.62,
"learning_rate": 6.317868971358725e-06,
"loss": 7.4839,
"step": 689000
},
{
"epoch": 2.62,
"learning_rate": 6.28616931168627e-06,
"loss": 7.5113,
"step": 689500
},
{
"epoch": 2.62,
"learning_rate": 6.254469652013817e-06,
"loss": 7.4889,
"step": 690000
},
{
"epoch": 2.63,
"learning_rate": 6.222769992341362e-06,
"loss": 7.4904,
"step": 690500
},
{
"epoch": 2.63,
"learning_rate": 6.191070332668909e-06,
"loss": 7.4978,
"step": 691000
},
{
"epoch": 2.63,
"learning_rate": 6.159370672996455e-06,
"loss": 7.4832,
"step": 691500
},
{
"epoch": 2.63,
"learning_rate": 6.127671013324001e-06,
"loss": 7.485,
"step": 692000
},
{
"epoch": 2.63,
"learning_rate": 6.095971353651547e-06,
"loss": 7.4716,
"step": 692500
},
{
"epoch": 2.64,
"learning_rate": 6.064271693979093e-06,
"loss": 7.5012,
"step": 693000
},
{
"epoch": 2.64,
"learning_rate": 6.03257203430664e-06,
"loss": 7.4798,
"step": 693500
},
{
"epoch": 2.64,
"learning_rate": 6.000872374634186e-06,
"loss": 7.4806,
"step": 694000
},
{
"epoch": 2.64,
"learning_rate": 5.9691727149617325e-06,
"loss": 7.486,
"step": 694500
},
{
"epoch": 2.64,
"learning_rate": 5.937473055289278e-06,
"loss": 7.4836,
"step": 695000
},
{
"epoch": 2.65,
"learning_rate": 5.905773395616825e-06,
"loss": 7.4876,
"step": 695500
},
{
"epoch": 2.65,
"learning_rate": 5.874073735944372e-06,
"loss": 7.468,
"step": 696000
},
{
"epoch": 2.65,
"learning_rate": 5.842374076271918e-06,
"loss": 7.4871,
"step": 696500
},
{
"epoch": 2.65,
"learning_rate": 5.810674416599464e-06,
"loss": 7.493,
"step": 697000
},
{
"epoch": 2.65,
"learning_rate": 5.77897475692701e-06,
"loss": 7.4734,
"step": 697500
},
{
"epoch": 2.66,
"learning_rate": 5.747275097254556e-06,
"loss": 7.4704,
"step": 698000
},
{
"epoch": 2.66,
"learning_rate": 5.715575437582103e-06,
"loss": 7.4867,
"step": 698500
},
{
"epoch": 2.66,
"learning_rate": 5.683875777909649e-06,
"loss": 7.4739,
"step": 699000
},
{
"epoch": 2.66,
"learning_rate": 5.652176118237195e-06,
"loss": 7.4763,
"step": 699500
},
{
"epoch": 2.66,
"learning_rate": 5.620476458564741e-06,
"loss": 7.483,
"step": 700000
},
{
"epoch": 2.66,
"learning_rate": 5.588776798892288e-06,
"loss": 7.4921,
"step": 700500
},
{
"epoch": 2.67,
"learning_rate": 5.557077139219834e-06,
"loss": 7.4833,
"step": 701000
},
{
"epoch": 2.67,
"learning_rate": 5.52537747954738e-06,
"loss": 7.497,
"step": 701500
},
{
"epoch": 2.67,
"learning_rate": 5.493677819874926e-06,
"loss": 7.4893,
"step": 702000
},
{
"epoch": 2.67,
"learning_rate": 5.461978160202472e-06,
"loss": 7.4757,
"step": 702500
},
{
"epoch": 2.67,
"learning_rate": 5.430278500530019e-06,
"loss": 7.4707,
"step": 703000
},
{
"epoch": 2.68,
"learning_rate": 5.398578840857565e-06,
"loss": 7.4749,
"step": 703500
},
{
"epoch": 2.68,
"learning_rate": 5.366879181185111e-06,
"loss": 7.4802,
"step": 704000
},
{
"epoch": 2.68,
"learning_rate": 5.335179521512657e-06,
"loss": 7.4991,
"step": 704500
},
{
"epoch": 2.68,
"learning_rate": 5.303479861840203e-06,
"loss": 7.4917,
"step": 705000
},
{
"epoch": 2.68,
"learning_rate": 5.27178020216775e-06,
"loss": 7.4783,
"step": 705500
},
{
"epoch": 2.69,
"learning_rate": 5.240080542495296e-06,
"loss": 7.4963,
"step": 706000
},
{
"epoch": 2.69,
"learning_rate": 5.208380882822842e-06,
"loss": 7.4929,
"step": 706500
},
{
"epoch": 2.69,
"learning_rate": 5.176681223150388e-06,
"loss": 7.4678,
"step": 707000
},
{
"epoch": 2.69,
"learning_rate": 5.144981563477935e-06,
"loss": 7.5026,
"step": 707500
},
{
"epoch": 2.69,
"learning_rate": 5.113281903805481e-06,
"loss": 7.4733,
"step": 708000
},
{
"epoch": 2.7,
"learning_rate": 5.081582244133027e-06,
"loss": 7.4956,
"step": 708500
},
{
"epoch": 2.7,
"learning_rate": 5.049882584460573e-06,
"loss": 7.4927,
"step": 709000
},
{
"epoch": 2.7,
"learning_rate": 5.018182924788119e-06,
"loss": 7.4927,
"step": 709500
},
{
"epoch": 2.7,
"learning_rate": 4.986483265115666e-06,
"loss": 7.4829,
"step": 710000
},
{
"epoch": 2.7,
"learning_rate": 4.954783605443212e-06,
"loss": 7.4863,
"step": 710500
},
{
"epoch": 2.7,
"learning_rate": 4.9230839457707585e-06,
"loss": 7.4888,
"step": 711000
},
{
"epoch": 2.71,
"learning_rate": 4.891384286098305e-06,
"loss": 7.4863,
"step": 711500
},
{
"epoch": 2.71,
"learning_rate": 4.859684626425851e-06,
"loss": 7.4832,
"step": 712000
},
{
"epoch": 2.71,
"learning_rate": 4.827984966753398e-06,
"loss": 7.4808,
"step": 712500
},
{
"epoch": 2.71,
"learning_rate": 4.796285307080944e-06,
"loss": 7.4834,
"step": 713000
},
{
"epoch": 2.71,
"learning_rate": 4.7645856474084895e-06,
"loss": 7.4914,
"step": 713500
},
{
"epoch": 2.72,
"learning_rate": 4.732885987736036e-06,
"loss": 7.4729,
"step": 714000
},
{
"epoch": 2.72,
"learning_rate": 4.701186328063582e-06,
"loss": 7.4726,
"step": 714500
},
{
"epoch": 2.72,
"learning_rate": 4.669486668391129e-06,
"loss": 7.4795,
"step": 715000
},
{
"epoch": 2.72,
"learning_rate": 4.637787008718675e-06,
"loss": 7.4856,
"step": 715500
},
{
"epoch": 2.72,
"learning_rate": 4.606087349046221e-06,
"loss": 7.4836,
"step": 716000
},
{
"epoch": 2.73,
"learning_rate": 4.574387689373767e-06,
"loss": 7.4901,
"step": 716500
},
{
"epoch": 2.73,
"learning_rate": 4.542688029701313e-06,
"loss": 7.4942,
"step": 717000
},
{
"epoch": 2.73,
"learning_rate": 4.51098837002886e-06,
"loss": 7.4943,
"step": 717500
},
{
"epoch": 2.73,
"learning_rate": 4.479288710356406e-06,
"loss": 7.4813,
"step": 718000
},
{
"epoch": 2.73,
"learning_rate": 4.447589050683952e-06,
"loss": 7.4915,
"step": 718500
},
{
"epoch": 2.74,
"learning_rate": 4.415889391011498e-06,
"loss": 7.4744,
"step": 719000
},
{
"epoch": 2.74,
"learning_rate": 4.384189731339045e-06,
"loss": 7.4867,
"step": 719500
},
{
"epoch": 2.74,
"learning_rate": 4.352490071666591e-06,
"loss": 7.4755,
"step": 720000
},
{
"epoch": 2.74,
"learning_rate": 4.3207904119941366e-06,
"loss": 7.4858,
"step": 720500
},
{
"epoch": 2.74,
"learning_rate": 4.289090752321683e-06,
"loss": 7.4846,
"step": 721000
},
{
"epoch": 2.74,
"learning_rate": 4.257391092649229e-06,
"loss": 7.4888,
"step": 721500
},
{
"epoch": 2.75,
"learning_rate": 4.225691432976776e-06,
"loss": 7.4856,
"step": 722000
},
{
"epoch": 2.75,
"learning_rate": 4.193991773304322e-06,
"loss": 7.5096,
"step": 722500
},
{
"epoch": 2.75,
"learning_rate": 4.162292113631868e-06,
"loss": 7.4782,
"step": 723000
},
{
"epoch": 2.75,
"learning_rate": 4.130592453959414e-06,
"loss": 7.4777,
"step": 723500
},
{
"epoch": 2.75,
"learning_rate": 4.09889279428696e-06,
"loss": 7.4838,
"step": 724000
},
{
"epoch": 2.76,
"learning_rate": 4.067193134614507e-06,
"loss": 7.4883,
"step": 724500
},
{
"epoch": 2.76,
"learning_rate": 4.035493474942053e-06,
"loss": 7.4824,
"step": 725000
},
{
"epoch": 2.76,
"learning_rate": 4.003793815269599e-06,
"loss": 7.4915,
"step": 725500
},
{
"epoch": 2.76,
"learning_rate": 3.972094155597145e-06,
"loss": 7.4854,
"step": 726000
},
{
"epoch": 2.76,
"learning_rate": 3.940394495924692e-06,
"loss": 7.4829,
"step": 726500
},
{
"epoch": 2.77,
"learning_rate": 3.908694836252238e-06,
"loss": 7.4947,
"step": 727000
},
{
"epoch": 2.77,
"learning_rate": 3.8769951765797845e-06,
"loss": 7.49,
"step": 727500
},
{
"epoch": 2.77,
"learning_rate": 3.845295516907331e-06,
"loss": 7.4696,
"step": 728000
},
{
"epoch": 2.77,
"learning_rate": 3.813595857234877e-06,
"loss": 7.4842,
"step": 728500
},
{
"epoch": 2.77,
"learning_rate": 3.7818961975624233e-06,
"loss": 7.5,
"step": 729000
},
{
"epoch": 2.77,
"learning_rate": 3.7501965378899696e-06,
"loss": 7.4932,
"step": 729500
},
{
"epoch": 2.78,
"learning_rate": 3.718496878217516e-06,
"loss": 7.4867,
"step": 730000
},
{
"epoch": 2.78,
"learning_rate": 3.686797218545062e-06,
"loss": 7.4942,
"step": 730500
},
{
"epoch": 2.78,
"learning_rate": 3.6550975588726085e-06,
"loss": 7.4817,
"step": 731000
},
{
"epoch": 2.78,
"learning_rate": 3.6233978992001543e-06,
"loss": 7.4858,
"step": 731500
},
{
"epoch": 2.78,
"learning_rate": 3.5916982395277006e-06,
"loss": 7.484,
"step": 732000
},
{
"epoch": 2.79,
"learning_rate": 3.559998579855247e-06,
"loss": 7.4813,
"step": 732500
},
{
"epoch": 2.79,
"learning_rate": 3.528298920182793e-06,
"loss": 7.4867,
"step": 733000
},
{
"epoch": 2.79,
"learning_rate": 3.4965992605103394e-06,
"loss": 7.5001,
"step": 733500
},
{
"epoch": 2.79,
"learning_rate": 3.4648996008378857e-06,
"loss": 7.4765,
"step": 734000
},
{
"epoch": 2.79,
"learning_rate": 3.433199941165432e-06,
"loss": 7.4821,
"step": 734500
},
{
"epoch": 2.8,
"learning_rate": 3.401500281492978e-06,
"loss": 7.4928,
"step": 735000
},
{
"epoch": 2.8,
"learning_rate": 3.369800621820524e-06,
"loss": 7.4892,
"step": 735500
},
{
"epoch": 2.8,
"learning_rate": 3.3381009621480704e-06,
"loss": 7.492,
"step": 736000
},
{
"epoch": 2.8,
"learning_rate": 3.3064013024756167e-06,
"loss": 7.502,
"step": 736500
},
{
"epoch": 2.8,
"learning_rate": 3.274701642803163e-06,
"loss": 7.4886,
"step": 737000
},
{
"epoch": 2.81,
"learning_rate": 3.2430019831307093e-06,
"loss": 7.478,
"step": 737500
},
{
"epoch": 2.81,
"learning_rate": 3.2113023234582555e-06,
"loss": 7.4988,
"step": 738000
},
{
"epoch": 2.81,
"learning_rate": 3.1796026637858014e-06,
"loss": 7.5066,
"step": 738500
},
{
"epoch": 2.81,
"learning_rate": 3.1479030041133477e-06,
"loss": 7.4705,
"step": 739000
},
{
"epoch": 2.81,
"learning_rate": 3.1162033444408944e-06,
"loss": 7.4903,
"step": 739500
},
{
"epoch": 2.81,
"learning_rate": 3.0845036847684407e-06,
"loss": 7.4835,
"step": 740000
},
{
"epoch": 2.82,
"learning_rate": 3.052804025095987e-06,
"loss": 7.5007,
"step": 740500
},
{
"epoch": 2.82,
"learning_rate": 3.0211043654235332e-06,
"loss": 7.4836,
"step": 741000
},
{
"epoch": 2.82,
"learning_rate": 2.989404705751079e-06,
"loss": 7.4821,
"step": 741500
},
{
"epoch": 2.82,
"learning_rate": 2.9577050460786254e-06,
"loss": 7.4831,
"step": 742000
},
{
"epoch": 2.82,
"learning_rate": 2.9260053864061716e-06,
"loss": 7.4771,
"step": 742500
},
{
"epoch": 2.83,
"learning_rate": 2.894305726733718e-06,
"loss": 7.4929,
"step": 743000
},
{
"epoch": 2.83,
"learning_rate": 2.862606067061264e-06,
"loss": 7.4817,
"step": 743500
},
{
"epoch": 2.83,
"learning_rate": 2.8309064073888105e-06,
"loss": 7.4738,
"step": 744000
},
{
"epoch": 2.83,
"learning_rate": 2.7992067477163568e-06,
"loss": 7.4834,
"step": 744500
},
{
"epoch": 2.83,
"learning_rate": 2.7675070880439026e-06,
"loss": 7.4874,
"step": 745000
},
{
"epoch": 2.84,
"learning_rate": 2.735807428371449e-06,
"loss": 7.4922,
"step": 745500
},
{
"epoch": 2.84,
"learning_rate": 2.704107768698995e-06,
"loss": 7.4841,
"step": 746000
},
{
"epoch": 2.84,
"learning_rate": 2.6724081090265415e-06,
"loss": 7.4811,
"step": 746500
},
{
"epoch": 2.84,
"learning_rate": 2.6407084493540877e-06,
"loss": 7.5024,
"step": 747000
},
{
"epoch": 2.84,
"learning_rate": 2.6090087896816345e-06,
"loss": 7.4905,
"step": 747500
},
{
"epoch": 2.85,
"learning_rate": 2.5773091300091803e-06,
"loss": 7.4966,
"step": 748000
},
{
"epoch": 2.85,
"learning_rate": 2.5456094703367266e-06,
"loss": 7.4824,
"step": 748500
},
{
"epoch": 2.85,
"learning_rate": 2.513909810664273e-06,
"loss": 7.4926,
"step": 749000
},
{
"epoch": 2.85,
"learning_rate": 2.482210150991819e-06,
"loss": 7.5065,
"step": 749500
},
{
"epoch": 2.85,
"learning_rate": 2.4505104913193654e-06,
"loss": 7.489,
"step": 750000
},
{
"epoch": 2.85,
"learning_rate": 2.4188108316469117e-06,
"loss": 7.4916,
"step": 750500
},
{
"epoch": 2.86,
"learning_rate": 2.387111171974458e-06,
"loss": 7.4795,
"step": 751000
},
{
"epoch": 2.86,
"learning_rate": 2.355411512302004e-06,
"loss": 7.4899,
"step": 751500
},
{
"epoch": 2.86,
"learning_rate": 2.32371185262955e-06,
"loss": 7.4849,
"step": 752000
},
{
"epoch": 2.86,
"learning_rate": 2.2920121929570964e-06,
"loss": 7.4987,
"step": 752500
},
{
"epoch": 2.86,
"learning_rate": 2.2603125332846427e-06,
"loss": 7.492,
"step": 753000
},
{
"epoch": 2.87,
"learning_rate": 2.228612873612189e-06,
"loss": 7.4875,
"step": 753500
},
{
"epoch": 2.87,
"learning_rate": 2.1969132139397353e-06,
"loss": 7.4801,
"step": 754000
},
{
"epoch": 2.87,
"learning_rate": 2.1652135542672815e-06,
"loss": 7.4815,
"step": 754500
},
{
"epoch": 2.87,
"learning_rate": 2.1335138945948274e-06,
"loss": 7.4784,
"step": 755000
},
{
"epoch": 2.87,
"learning_rate": 2.101814234922374e-06,
"loss": 7.49,
"step": 755500
},
{
"epoch": 2.88,
"learning_rate": 2.0701145752499204e-06,
"loss": 7.4881,
"step": 756000
},
{
"epoch": 2.88,
"learning_rate": 2.0384149155774667e-06,
"loss": 7.5105,
"step": 756500
},
{
"epoch": 2.88,
"learning_rate": 2.006715255905013e-06,
"loss": 7.474,
"step": 757000
},
{
"epoch": 2.88,
"learning_rate": 1.9750155962325592e-06,
"loss": 7.4827,
"step": 757500
},
{
"epoch": 2.88,
"learning_rate": 1.943315936560105e-06,
"loss": 7.4983,
"step": 758000
},
{
"epoch": 2.89,
"learning_rate": 1.9116162768876514e-06,
"loss": 7.4819,
"step": 758500
},
{
"epoch": 2.89,
"learning_rate": 1.8799166172151976e-06,
"loss": 7.4906,
"step": 759000
},
{
"epoch": 2.89,
"learning_rate": 1.848216957542744e-06,
"loss": 7.4869,
"step": 759500
},
{
"epoch": 2.89,
"learning_rate": 1.8165172978702902e-06,
"loss": 7.5035,
"step": 760000
},
{
"epoch": 2.89,
"learning_rate": 1.7848176381978363e-06,
"loss": 7.4905,
"step": 760500
},
{
"epoch": 2.89,
"learning_rate": 1.7531179785253826e-06,
"loss": 7.5053,
"step": 761000
},
{
"epoch": 2.9,
"learning_rate": 1.7214183188529288e-06,
"loss": 7.4814,
"step": 761500
},
{
"epoch": 2.9,
"learning_rate": 1.689718659180475e-06,
"loss": 7.4942,
"step": 762000
},
{
"epoch": 2.9,
"learning_rate": 1.6580189995080212e-06,
"loss": 7.4941,
"step": 762500
},
{
"epoch": 2.9,
"learning_rate": 1.6263193398355675e-06,
"loss": 7.4806,
"step": 763000
},
{
"epoch": 2.9,
"learning_rate": 1.594619680163114e-06,
"loss": 7.488,
"step": 763500
},
{
"epoch": 2.91,
"learning_rate": 1.5629200204906602e-06,
"loss": 7.4943,
"step": 764000
},
{
"epoch": 2.91,
"learning_rate": 1.5312203608182063e-06,
"loss": 7.4968,
"step": 764500
},
{
"epoch": 2.91,
"learning_rate": 1.4995207011457526e-06,
"loss": 7.4926,
"step": 765000
},
{
"epoch": 2.91,
"learning_rate": 1.4678210414732989e-06,
"loss": 7.4796,
"step": 765500
},
{
"epoch": 2.91,
"learning_rate": 1.4361213818008451e-06,
"loss": 7.4879,
"step": 766000
},
{
"epoch": 2.92,
"learning_rate": 1.4044217221283914e-06,
"loss": 7.4766,
"step": 766500
},
{
"epoch": 2.92,
"learning_rate": 1.3727220624559375e-06,
"loss": 7.4799,
"step": 767000
},
{
"epoch": 2.92,
"learning_rate": 1.3410224027834838e-06,
"loss": 7.4736,
"step": 767500
},
{
"epoch": 2.92,
"learning_rate": 1.30932274311103e-06,
"loss": 7.4757,
"step": 768000
},
{
"epoch": 2.92,
"learning_rate": 1.2776230834385761e-06,
"loss": 7.4907,
"step": 768500
},
{
"epoch": 2.93,
"learning_rate": 1.2459234237661224e-06,
"loss": 7.4913,
"step": 769000
},
{
"epoch": 2.93,
"learning_rate": 1.214223764093669e-06,
"loss": 7.4808,
"step": 769500
},
{
"epoch": 2.93,
"learning_rate": 1.182524104421215e-06,
"loss": 7.4841,
"step": 770000
},
{
"epoch": 2.93,
"learning_rate": 1.1508244447487612e-06,
"loss": 7.4796,
"step": 770500
},
{
"epoch": 2.93,
"learning_rate": 1.1191247850763075e-06,
"loss": 7.4991,
"step": 771000
},
{
"epoch": 2.93,
"learning_rate": 1.0874251254038536e-06,
"loss": 7.489,
"step": 771500
},
{
"epoch": 2.94,
"learning_rate": 1.0557254657313999e-06,
"loss": 7.4808,
"step": 772000
},
{
"epoch": 2.94,
"learning_rate": 1.0240258060589462e-06,
"loss": 7.4912,
"step": 772500
},
{
"epoch": 2.94,
"learning_rate": 9.923261463864924e-07,
"loss": 7.4826,
"step": 773000
},
{
"epoch": 2.94,
"learning_rate": 9.606264867140387e-07,
"loss": 7.4879,
"step": 773500
},
{
"epoch": 2.94,
"learning_rate": 9.28926827041585e-07,
"loss": 7.4882,
"step": 774000
},
{
"epoch": 2.95,
"learning_rate": 8.972271673691312e-07,
"loss": 7.5192,
"step": 774500
},
{
"epoch": 2.95,
"learning_rate": 8.655275076966775e-07,
"loss": 7.4925,
"step": 775000
},
{
"epoch": 2.95,
"learning_rate": 8.338278480242236e-07,
"loss": 7.4822,
"step": 775500
},
{
"epoch": 2.95,
"learning_rate": 8.021281883517698e-07,
"loss": 7.4826,
"step": 776000
},
{
"epoch": 2.95,
"learning_rate": 7.704285286793162e-07,
"loss": 7.5019,
"step": 776500
},
{
"epoch": 2.96,
"learning_rate": 7.387288690068624e-07,
"loss": 7.4893,
"step": 777000
},
{
"epoch": 2.96,
"learning_rate": 7.070292093344086e-07,
"loss": 7.4881,
"step": 777500
},
{
"epoch": 2.96,
"learning_rate": 6.753295496619548e-07,
"loss": 7.4814,
"step": 778000
},
{
"epoch": 2.96,
"learning_rate": 6.436298899895011e-07,
"loss": 7.4952,
"step": 778500
},
{
"epoch": 2.96,
"learning_rate": 6.119302303170474e-07,
"loss": 7.5016,
"step": 779000
},
{
"epoch": 2.97,
"learning_rate": 5.802305706445936e-07,
"loss": 7.4836,
"step": 779500
},
{
"epoch": 2.97,
"learning_rate": 5.485309109721397e-07,
"loss": 7.5027,
"step": 780000
},
{
"epoch": 2.97,
"learning_rate": 5.168312512996861e-07,
"loss": 7.5021,
"step": 780500
},
{
"epoch": 2.97,
"learning_rate": 4.851315916272323e-07,
"loss": 7.4949,
"step": 781000
},
{
"epoch": 2.97,
"learning_rate": 4.534319319547785e-07,
"loss": 7.4767,
"step": 781500
},
{
"epoch": 2.97,
"learning_rate": 4.2173227228232475e-07,
"loss": 7.4891,
"step": 782000
},
{
"epoch": 2.98,
"learning_rate": 3.9003261260987103e-07,
"loss": 7.4797,
"step": 782500
},
{
"epoch": 2.98,
"learning_rate": 3.5833295293741726e-07,
"loss": 7.502,
"step": 783000
},
{
"epoch": 2.98,
"learning_rate": 3.266332932649635e-07,
"loss": 7.4929,
"step": 783500
},
{
"epoch": 2.98,
"learning_rate": 2.9493363359250977e-07,
"loss": 7.4897,
"step": 784000
},
{
"epoch": 2.98,
"learning_rate": 2.63233973920056e-07,
"loss": 7.4833,
"step": 784500
},
{
"epoch": 2.99,
"learning_rate": 2.3153431424760225e-07,
"loss": 7.4747,
"step": 785000
},
{
"epoch": 2.99,
"learning_rate": 1.9983465457514848e-07,
"loss": 7.485,
"step": 785500
},
{
"epoch": 2.99,
"learning_rate": 1.6813499490269473e-07,
"loss": 7.4975,
"step": 786000
},
{
"epoch": 2.99,
"learning_rate": 1.36435335230241e-07,
"loss": 7.4765,
"step": 786500
},
{
"epoch": 2.99,
"learning_rate": 1.0473567555778721e-07,
"loss": 7.482,
"step": 787000
},
{
"epoch": 3.0,
"learning_rate": 7.303601588533346e-08,
"loss": 7.4931,
"step": 787500
},
{
"epoch": 3.0,
"learning_rate": 4.13363562128797e-08,
"loss": 7.4786,
"step": 788000
},
{
"epoch": 3.0,
"learning_rate": 9.636696540425943e-09,
"loss": 7.4738,
"step": 788500
},
{
"epoch": 3.0,
"step": 788652,
"total_flos": 1.3227171767511228e+19,
"train_loss": 7.491424576953926,
"train_runtime": 331955.1531,
"train_samples_per_second": 19.006,
"train_steps_per_second": 2.376
}
],
"max_steps": 788652,
"num_train_epochs": 3,
"total_flos": 1.3227171767511228e+19,
"trial_name": null,
"trial_params": null
}