{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 788652, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.996830034032755e-05, "loss": 7.8831, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9936600680655096e-05, "loss": 7.6494, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.990490102098264e-05, "loss": 7.6212, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.987320136131019e-05, "loss": 7.6134, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.984150170163773e-05, "loss": 7.5908, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.980980204196528e-05, "loss": 7.5855, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.9778102382292826e-05, "loss": 7.5888, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.974640272262037e-05, "loss": 7.5772, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.971470306294792e-05, "loss": 7.5693, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.968300340327546e-05, "loss": 7.5633, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.965130374360301e-05, "loss": 7.567, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.961960408393056e-05, "loss": 7.5412, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.95879044242581e-05, "loss": 7.5584, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.955620476458565e-05, "loss": 7.5604, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.952450510491319e-05, "loss": 7.568, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.949280544524074e-05, "loss": 7.5403, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.946110578556829e-05, "loss": 7.5384, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.9429406125895836e-05, "loss": 7.552, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.939770646622338e-05, "loss": 7.553, "step": 9500 }, { "epoch": 0.04, "learning_rate": 4.936600680655093e-05, "loss": 7.5243, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.933430714687847e-05, "loss": 7.5329, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.930260748720602e-05, "loss": 7.5257, "step": 11000 }, { "epoch": 0.04, "learning_rate": 4.9270907827533566e-05, "loss": 7.5282, "step": 11500 }, { "epoch": 0.05, "learning_rate": 4.923920816786111e-05, "loss": 7.5224, "step": 12000 }, { "epoch": 0.05, "learning_rate": 4.920750850818866e-05, "loss": 7.5279, "step": 12500 }, { "epoch": 0.05, "learning_rate": 4.91758088485162e-05, "loss": 7.5301, "step": 13000 }, { "epoch": 0.05, "learning_rate": 4.914410918884375e-05, "loss": 7.5161, "step": 13500 }, { "epoch": 0.05, "learning_rate": 4.9112409529171297e-05, "loss": 7.5328, "step": 14000 }, { "epoch": 0.06, "learning_rate": 4.908070986949884e-05, "loss": 7.5159, "step": 14500 }, { "epoch": 0.06, "learning_rate": 4.904901020982639e-05, "loss": 7.505, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.901731055015393e-05, "loss": 7.5269, "step": 15500 }, { "epoch": 0.06, "learning_rate": 4.898561089048148e-05, "loss": 7.5221, "step": 16000 }, { "epoch": 0.06, "learning_rate": 4.895391123080903e-05, "loss": 7.5086, "step": 16500 }, { "epoch": 0.06, "learning_rate": 4.892221157113657e-05, "loss": 7.5176, "step": 17000 }, { "epoch": 0.07, "learning_rate": 4.889051191146412e-05, "loss": 7.5047, "step": 17500 }, { "epoch": 0.07, "learning_rate": 4.885881225179167e-05, "loss": 7.5244, "step": 18000 }, { "epoch": 0.07, "learning_rate": 4.8827112592119214e-05, "loss": 7.5216, "step": 18500 }, { "epoch": 0.07, "learning_rate": 4.879541293244676e-05, "loss": 7.5125, "step": 19000 }, { "epoch": 0.07, "learning_rate": 4.876371327277431e-05, "loss": 7.5166, "step": 19500 }, { "epoch": 0.08, "learning_rate": 4.873201361310185e-05, "loss": 7.5145, "step": 20000 }, { "epoch": 0.08, "learning_rate": 4.87003139534294e-05, "loss": 7.5154, "step": 20500 }, { "epoch": 0.08, "learning_rate": 4.8668614293756944e-05, "loss": 7.4982, "step": 21000 }, { "epoch": 0.08, "learning_rate": 4.8636914634084494e-05, "loss": 7.5113, "step": 21500 }, { "epoch": 0.08, "learning_rate": 4.860521497441204e-05, "loss": 7.5053, "step": 22000 }, { "epoch": 0.09, "learning_rate": 4.857351531473958e-05, "loss": 7.5311, "step": 22500 }, { "epoch": 0.09, "learning_rate": 4.854181565506713e-05, "loss": 7.4977, "step": 23000 }, { "epoch": 0.09, "learning_rate": 4.851011599539468e-05, "loss": 7.5142, "step": 23500 }, { "epoch": 0.09, "learning_rate": 4.8478416335722224e-05, "loss": 7.5061, "step": 24000 }, { "epoch": 0.09, "learning_rate": 4.844671667604977e-05, "loss": 7.519, "step": 24500 }, { "epoch": 0.1, "learning_rate": 4.841501701637731e-05, "loss": 7.5055, "step": 25000 }, { "epoch": 0.1, "learning_rate": 4.838331735670486e-05, "loss": 7.5135, "step": 25500 }, { "epoch": 0.1, "learning_rate": 4.835161769703241e-05, "loss": 7.5041, "step": 26000 }, { "epoch": 0.1, "learning_rate": 4.8319918037359954e-05, "loss": 7.5036, "step": 26500 }, { "epoch": 0.1, "learning_rate": 4.82882183776875e-05, "loss": 7.526, "step": 27000 }, { "epoch": 0.1, "learning_rate": 4.825651871801505e-05, "loss": 7.5069, "step": 27500 }, { "epoch": 0.11, "learning_rate": 4.822481905834259e-05, "loss": 7.4989, "step": 28000 }, { "epoch": 0.11, "learning_rate": 4.819311939867014e-05, "loss": 7.5217, "step": 28500 }, { "epoch": 0.11, "learning_rate": 4.8161419738997684e-05, "loss": 7.4926, "step": 29000 }, { "epoch": 0.11, "learning_rate": 4.812972007932523e-05, "loss": 7.5044, "step": 29500 }, { "epoch": 0.11, "learning_rate": 4.809802041965278e-05, "loss": 7.4951, "step": 30000 }, { "epoch": 0.12, "learning_rate": 4.806632075998032e-05, "loss": 7.5096, "step": 30500 }, { "epoch": 0.12, "learning_rate": 4.803462110030787e-05, "loss": 7.5154, "step": 31000 }, { "epoch": 0.12, "learning_rate": 4.800292144063542e-05, "loss": 7.5022, "step": 31500 }, { "epoch": 0.12, "learning_rate": 4.797122178096296e-05, "loss": 7.5204, "step": 32000 }, { "epoch": 0.12, "learning_rate": 4.793952212129051e-05, "loss": 7.5097, "step": 32500 }, { "epoch": 0.13, "learning_rate": 4.790782246161806e-05, "loss": 7.5172, "step": 33000 }, { "epoch": 0.13, "learning_rate": 4.78761228019456e-05, "loss": 7.4931, "step": 33500 }, { "epoch": 0.13, "learning_rate": 4.784442314227315e-05, "loss": 7.4993, "step": 34000 }, { "epoch": 0.13, "learning_rate": 4.781272348260069e-05, "loss": 7.4984, "step": 34500 }, { "epoch": 0.13, "learning_rate": 4.778102382292824e-05, "loss": 7.5244, "step": 35000 }, { "epoch": 0.14, "learning_rate": 4.774932416325579e-05, "loss": 7.504, "step": 35500 }, { "epoch": 0.14, "learning_rate": 4.771762450358333e-05, "loss": 7.5031, "step": 36000 }, { "epoch": 0.14, "learning_rate": 4.768592484391088e-05, "loss": 7.5034, "step": 36500 }, { "epoch": 0.14, "learning_rate": 4.7654225184238425e-05, "loss": 7.5217, "step": 37000 }, { "epoch": 0.14, "learning_rate": 4.762252552456597e-05, "loss": 7.5025, "step": 37500 }, { "epoch": 0.14, "learning_rate": 4.759082586489352e-05, "loss": 7.5059, "step": 38000 }, { "epoch": 0.15, "learning_rate": 4.755912620522106e-05, "loss": 7.5178, "step": 38500 }, { "epoch": 0.15, "learning_rate": 4.752742654554861e-05, "loss": 7.5004, "step": 39000 }, { "epoch": 0.15, "learning_rate": 4.7495726885876155e-05, "loss": 7.5094, "step": 39500 }, { "epoch": 0.15, "learning_rate": 4.74640272262037e-05, "loss": 7.4932, "step": 40000 }, { "epoch": 0.15, "learning_rate": 4.743232756653125e-05, "loss": 7.5062, "step": 40500 }, { "epoch": 0.16, "learning_rate": 4.74006279068588e-05, "loss": 7.5029, "step": 41000 }, { "epoch": 0.16, "learning_rate": 4.736892824718634e-05, "loss": 7.5028, "step": 41500 }, { "epoch": 0.16, "learning_rate": 4.7337228587513885e-05, "loss": 7.5218, "step": 42000 }, { "epoch": 0.16, "learning_rate": 4.730552892784143e-05, "loss": 7.4965, "step": 42500 }, { "epoch": 0.16, "learning_rate": 4.727382926816898e-05, "loss": 7.5068, "step": 43000 }, { "epoch": 0.17, "learning_rate": 4.724212960849653e-05, "loss": 7.5086, "step": 43500 }, { "epoch": 0.17, "learning_rate": 4.721042994882407e-05, "loss": 7.4934, "step": 44000 }, { "epoch": 0.17, "learning_rate": 4.7178730289151615e-05, "loss": 7.4922, "step": 44500 }, { "epoch": 0.17, "learning_rate": 4.7147030629479165e-05, "loss": 7.4961, "step": 45000 }, { "epoch": 0.17, "learning_rate": 4.711533096980671e-05, "loss": 7.5111, "step": 45500 }, { "epoch": 0.17, "learning_rate": 4.708363131013426e-05, "loss": 7.4964, "step": 46000 }, { "epoch": 0.18, "learning_rate": 4.70519316504618e-05, "loss": 7.5062, "step": 46500 }, { "epoch": 0.18, "learning_rate": 4.7020231990789345e-05, "loss": 7.4992, "step": 47000 }, { "epoch": 0.18, "learning_rate": 4.6988532331116895e-05, "loss": 7.5116, "step": 47500 }, { "epoch": 0.18, "learning_rate": 4.695683267144444e-05, "loss": 7.5012, "step": 48000 }, { "epoch": 0.18, "learning_rate": 4.692513301177199e-05, "loss": 7.4854, "step": 48500 }, { "epoch": 0.19, "learning_rate": 4.689343335209954e-05, "loss": 7.4943, "step": 49000 }, { "epoch": 0.19, "learning_rate": 4.686173369242708e-05, "loss": 7.498, "step": 49500 }, { "epoch": 0.19, "learning_rate": 4.6830034032754625e-05, "loss": 7.4868, "step": 50000 }, { "epoch": 0.19, "learning_rate": 4.6798334373082175e-05, "loss": 7.4862, "step": 50500 }, { "epoch": 0.19, "learning_rate": 4.676663471340972e-05, "loss": 7.4992, "step": 51000 }, { "epoch": 0.2, "learning_rate": 4.673493505373727e-05, "loss": 7.4996, "step": 51500 }, { "epoch": 0.2, "learning_rate": 4.670323539406481e-05, "loss": 7.499, "step": 52000 }, { "epoch": 0.2, "learning_rate": 4.6671535734392356e-05, "loss": 7.503, "step": 52500 }, { "epoch": 0.2, "learning_rate": 4.6639836074719906e-05, "loss": 7.488, "step": 53000 }, { "epoch": 0.2, "learning_rate": 4.660813641504745e-05, "loss": 7.4991, "step": 53500 }, { "epoch": 0.21, "learning_rate": 4.6576436755375e-05, "loss": 7.4968, "step": 54000 }, { "epoch": 0.21, "learning_rate": 4.654473709570254e-05, "loss": 7.5004, "step": 54500 }, { "epoch": 0.21, "learning_rate": 4.6513037436030086e-05, "loss": 7.494, "step": 55000 }, { "epoch": 0.21, "learning_rate": 4.6481337776357636e-05, "loss": 7.4935, "step": 55500 }, { "epoch": 0.21, "learning_rate": 4.644963811668518e-05, "loss": 7.5, "step": 56000 }, { "epoch": 0.21, "learning_rate": 4.641793845701273e-05, "loss": 7.4858, "step": 56500 }, { "epoch": 0.22, "learning_rate": 4.638623879734027e-05, "loss": 7.5052, "step": 57000 }, { "epoch": 0.22, "learning_rate": 4.6354539137667816e-05, "loss": 7.5023, "step": 57500 }, { "epoch": 0.22, "learning_rate": 4.6322839477995366e-05, "loss": 7.4919, "step": 58000 }, { "epoch": 0.22, "learning_rate": 4.6291139818322916e-05, "loss": 7.49, "step": 58500 }, { "epoch": 0.22, "learning_rate": 4.625944015865046e-05, "loss": 7.5039, "step": 59000 }, { "epoch": 0.23, "learning_rate": 4.622774049897801e-05, "loss": 7.4817, "step": 59500 }, { "epoch": 0.23, "learning_rate": 4.6196040839305546e-05, "loss": 7.4938, "step": 60000 }, { "epoch": 0.23, "learning_rate": 4.6164341179633096e-05, "loss": 7.496, "step": 60500 }, { "epoch": 0.23, "learning_rate": 4.6132641519960646e-05, "loss": 7.4985, "step": 61000 }, { "epoch": 0.23, "learning_rate": 4.610094186028819e-05, "loss": 7.4996, "step": 61500 }, { "epoch": 0.24, "learning_rate": 4.606924220061574e-05, "loss": 7.5, "step": 62000 }, { "epoch": 0.24, "learning_rate": 4.603754254094328e-05, "loss": 7.4978, "step": 62500 }, { "epoch": 0.24, "learning_rate": 4.6005842881270826e-05, "loss": 7.4842, "step": 63000 }, { "epoch": 0.24, "learning_rate": 4.5974143221598376e-05, "loss": 7.5213, "step": 63500 }, { "epoch": 0.24, "learning_rate": 4.5942443561925926e-05, "loss": 7.4911, "step": 64000 }, { "epoch": 0.25, "learning_rate": 4.591074390225347e-05, "loss": 7.4901, "step": 64500 }, { "epoch": 0.25, "learning_rate": 4.587904424258101e-05, "loss": 7.5091, "step": 65000 }, { "epoch": 0.25, "learning_rate": 4.5847344582908556e-05, "loss": 7.5016, "step": 65500 }, { "epoch": 0.25, "learning_rate": 4.5815644923236106e-05, "loss": 7.4955, "step": 66000 }, { "epoch": 0.25, "learning_rate": 4.5783945263563656e-05, "loss": 7.484, "step": 66500 }, { "epoch": 0.25, "learning_rate": 4.57522456038912e-05, "loss": 7.4943, "step": 67000 }, { "epoch": 0.26, "learning_rate": 4.572054594421874e-05, "loss": 7.4962, "step": 67500 }, { "epoch": 0.26, "learning_rate": 4.568884628454629e-05, "loss": 7.4952, "step": 68000 }, { "epoch": 0.26, "learning_rate": 4.5657146624873837e-05, "loss": 7.5072, "step": 68500 }, { "epoch": 0.26, "learning_rate": 4.5625446965201387e-05, "loss": 7.5096, "step": 69000 }, { "epoch": 0.26, "learning_rate": 4.559374730552893e-05, "loss": 7.49, "step": 69500 }, { "epoch": 0.27, "learning_rate": 4.556204764585647e-05, "loss": 7.4966, "step": 70000 }, { "epoch": 0.27, "learning_rate": 4.553034798618402e-05, "loss": 7.5115, "step": 70500 }, { "epoch": 0.27, "learning_rate": 4.549864832651157e-05, "loss": 7.503, "step": 71000 }, { "epoch": 0.27, "learning_rate": 4.546694866683912e-05, "loss": 7.4929, "step": 71500 }, { "epoch": 0.27, "learning_rate": 4.543524900716667e-05, "loss": 7.4934, "step": 72000 }, { "epoch": 0.28, "learning_rate": 4.54035493474942e-05, "loss": 7.4957, "step": 72500 }, { "epoch": 0.28, "learning_rate": 4.5371849687821753e-05, "loss": 7.4916, "step": 73000 }, { "epoch": 0.28, "learning_rate": 4.53401500281493e-05, "loss": 7.5026, "step": 73500 }, { "epoch": 0.28, "learning_rate": 4.530845036847685e-05, "loss": 7.5125, "step": 74000 }, { "epoch": 0.28, "learning_rate": 4.52767507088044e-05, "loss": 7.4996, "step": 74500 }, { "epoch": 0.29, "learning_rate": 4.5245051049131933e-05, "loss": 7.4954, "step": 75000 }, { "epoch": 0.29, "learning_rate": 4.5213351389459484e-05, "loss": 7.4997, "step": 75500 }, { "epoch": 0.29, "learning_rate": 4.5181651729787034e-05, "loss": 7.5005, "step": 76000 }, { "epoch": 0.29, "learning_rate": 4.514995207011458e-05, "loss": 7.4982, "step": 76500 }, { "epoch": 0.29, "learning_rate": 4.511825241044213e-05, "loss": 7.4946, "step": 77000 }, { "epoch": 0.29, "learning_rate": 4.508655275076967e-05, "loss": 7.4983, "step": 77500 }, { "epoch": 0.3, "learning_rate": 4.5054853091097214e-05, "loss": 7.5142, "step": 78000 }, { "epoch": 0.3, "learning_rate": 4.5023153431424764e-05, "loss": 7.4921, "step": 78500 }, { "epoch": 0.3, "learning_rate": 4.499145377175231e-05, "loss": 7.4931, "step": 79000 }, { "epoch": 0.3, "learning_rate": 4.495975411207986e-05, "loss": 7.5017, "step": 79500 }, { "epoch": 0.3, "learning_rate": 4.49280544524074e-05, "loss": 7.4859, "step": 80000 }, { "epoch": 0.31, "learning_rate": 4.4896354792734944e-05, "loss": 7.4899, "step": 80500 }, { "epoch": 0.31, "learning_rate": 4.4864655133062494e-05, "loss": 7.4942, "step": 81000 }, { "epoch": 0.31, "learning_rate": 4.4832955473390044e-05, "loss": 7.4778, "step": 81500 }, { "epoch": 0.31, "learning_rate": 4.480125581371759e-05, "loss": 7.5, "step": 82000 }, { "epoch": 0.31, "learning_rate": 4.476955615404513e-05, "loss": 7.5007, "step": 82500 }, { "epoch": 0.32, "learning_rate": 4.4737856494372674e-05, "loss": 7.4928, "step": 83000 }, { "epoch": 0.32, "learning_rate": 4.4706156834700224e-05, "loss": 7.4768, "step": 83500 }, { "epoch": 0.32, "learning_rate": 4.4674457175027774e-05, "loss": 7.4966, "step": 84000 }, { "epoch": 0.32, "learning_rate": 4.464275751535532e-05, "loss": 7.5056, "step": 84500 }, { "epoch": 0.32, "learning_rate": 4.461105785568286e-05, "loss": 7.4967, "step": 85000 }, { "epoch": 0.33, "learning_rate": 4.457935819601041e-05, "loss": 7.493, "step": 85500 }, { "epoch": 0.33, "learning_rate": 4.4547658536337954e-05, "loss": 7.5024, "step": 86000 }, { "epoch": 0.33, "learning_rate": 4.4515958876665504e-05, "loss": 7.4954, "step": 86500 }, { "epoch": 0.33, "learning_rate": 4.448425921699305e-05, "loss": 7.493, "step": 87000 }, { "epoch": 0.33, "learning_rate": 4.44525595573206e-05, "loss": 7.5053, "step": 87500 }, { "epoch": 0.33, "learning_rate": 4.442085989764814e-05, "loss": 7.4864, "step": 88000 }, { "epoch": 0.34, "learning_rate": 4.4389160237975684e-05, "loss": 7.4929, "step": 88500 }, { "epoch": 0.34, "learning_rate": 4.4357460578303234e-05, "loss": 7.4995, "step": 89000 }, { "epoch": 0.34, "learning_rate": 4.4325760918630785e-05, "loss": 7.4882, "step": 89500 }, { "epoch": 0.34, "learning_rate": 4.429406125895833e-05, "loss": 7.5016, "step": 90000 }, { "epoch": 0.34, "learning_rate": 4.426236159928587e-05, "loss": 7.4996, "step": 90500 }, { "epoch": 0.35, "learning_rate": 4.4230661939613414e-05, "loss": 7.486, "step": 91000 }, { "epoch": 0.35, "learning_rate": 4.4198962279940965e-05, "loss": 7.5026, "step": 91500 }, { "epoch": 0.35, "learning_rate": 4.4167262620268515e-05, "loss": 7.4921, "step": 92000 }, { "epoch": 0.35, "learning_rate": 4.413556296059606e-05, "loss": 7.5054, "step": 92500 }, { "epoch": 0.35, "learning_rate": 4.41038633009236e-05, "loss": 7.4965, "step": 93000 }, { "epoch": 0.36, "learning_rate": 4.407216364125115e-05, "loss": 7.4943, "step": 93500 }, { "epoch": 0.36, "learning_rate": 4.4040463981578695e-05, "loss": 7.4944, "step": 94000 }, { "epoch": 0.36, "learning_rate": 4.4008764321906245e-05, "loss": 7.4919, "step": 94500 }, { "epoch": 0.36, "learning_rate": 4.397706466223379e-05, "loss": 7.4955, "step": 95000 }, { "epoch": 0.36, "learning_rate": 4.394536500256133e-05, "loss": 7.5051, "step": 95500 }, { "epoch": 0.37, "learning_rate": 4.391366534288888e-05, "loss": 7.5002, "step": 96000 }, { "epoch": 0.37, "learning_rate": 4.3881965683216425e-05, "loss": 7.5037, "step": 96500 }, { "epoch": 0.37, "learning_rate": 4.3850266023543975e-05, "loss": 7.4848, "step": 97000 }, { "epoch": 0.37, "learning_rate": 4.381856636387152e-05, "loss": 7.5064, "step": 97500 }, { "epoch": 0.37, "learning_rate": 4.378686670419906e-05, "loss": 7.4905, "step": 98000 }, { "epoch": 0.37, "learning_rate": 4.375516704452661e-05, "loss": 7.4929, "step": 98500 }, { "epoch": 0.38, "learning_rate": 4.372346738485416e-05, "loss": 7.5115, "step": 99000 }, { "epoch": 0.38, "learning_rate": 4.3691767725181705e-05, "loss": 7.4942, "step": 99500 }, { "epoch": 0.38, "learning_rate": 4.3660068065509255e-05, "loss": 7.5059, "step": 100000 }, { "epoch": 0.38, "learning_rate": 4.362836840583679e-05, "loss": 7.4867, "step": 100500 }, { "epoch": 0.38, "learning_rate": 4.359666874616434e-05, "loss": 7.4966, "step": 101000 }, { "epoch": 0.39, "learning_rate": 4.356496908649189e-05, "loss": 7.4955, "step": 101500 }, { "epoch": 0.39, "learning_rate": 4.3533269426819435e-05, "loss": 7.4877, "step": 102000 }, { "epoch": 0.39, "learning_rate": 4.3501569767146985e-05, "loss": 7.4893, "step": 102500 }, { "epoch": 0.39, "learning_rate": 4.346987010747453e-05, "loss": 7.4861, "step": 103000 }, { "epoch": 0.39, "learning_rate": 4.343817044780207e-05, "loss": 7.4841, "step": 103500 }, { "epoch": 0.4, "learning_rate": 4.340647078812962e-05, "loss": 7.49, "step": 104000 }, { "epoch": 0.4, "learning_rate": 4.3374771128457165e-05, "loss": 7.4895, "step": 104500 }, { "epoch": 0.4, "learning_rate": 4.3343071468784715e-05, "loss": 7.5027, "step": 105000 }, { "epoch": 0.4, "learning_rate": 4.331137180911226e-05, "loss": 7.493, "step": 105500 }, { "epoch": 0.4, "learning_rate": 4.32796721494398e-05, "loss": 7.4999, "step": 106000 }, { "epoch": 0.41, "learning_rate": 4.324797248976735e-05, "loss": 7.5011, "step": 106500 }, { "epoch": 0.41, "learning_rate": 4.32162728300949e-05, "loss": 7.503, "step": 107000 }, { "epoch": 0.41, "learning_rate": 4.3184573170422446e-05, "loss": 7.4797, "step": 107500 }, { "epoch": 0.41, "learning_rate": 4.315287351074999e-05, "loss": 7.4771, "step": 108000 }, { "epoch": 0.41, "learning_rate": 4.312117385107753e-05, "loss": 7.5038, "step": 108500 }, { "epoch": 0.41, "learning_rate": 4.308947419140508e-05, "loss": 7.4926, "step": 109000 }, { "epoch": 0.42, "learning_rate": 4.305777453173263e-05, "loss": 7.5078, "step": 109500 }, { "epoch": 0.42, "learning_rate": 4.3026074872060176e-05, "loss": 7.4931, "step": 110000 }, { "epoch": 0.42, "learning_rate": 4.299437521238772e-05, "loss": 7.4927, "step": 110500 }, { "epoch": 0.42, "learning_rate": 4.296267555271527e-05, "loss": 7.5058, "step": 111000 }, { "epoch": 0.42, "learning_rate": 4.293097589304281e-05, "loss": 7.4955, "step": 111500 }, { "epoch": 0.43, "learning_rate": 4.289927623337036e-05, "loss": 7.4994, "step": 112000 }, { "epoch": 0.43, "learning_rate": 4.286757657369791e-05, "loss": 7.5213, "step": 112500 }, { "epoch": 0.43, "learning_rate": 4.283587691402545e-05, "loss": 7.4715, "step": 113000 }, { "epoch": 0.43, "learning_rate": 4.2804177254353e-05, "loss": 7.5062, "step": 113500 }, { "epoch": 0.43, "learning_rate": 4.277247759468054e-05, "loss": 7.5004, "step": 114000 }, { "epoch": 0.44, "learning_rate": 4.274077793500809e-05, "loss": 7.4991, "step": 114500 }, { "epoch": 0.44, "learning_rate": 4.270907827533564e-05, "loss": 7.4989, "step": 115000 }, { "epoch": 0.44, "learning_rate": 4.2677378615663186e-05, "loss": 7.4709, "step": 115500 }, { "epoch": 0.44, "learning_rate": 4.264567895599073e-05, "loss": 7.4851, "step": 116000 }, { "epoch": 0.44, "learning_rate": 4.261397929631828e-05, "loss": 7.49, "step": 116500 }, { "epoch": 0.45, "learning_rate": 4.258227963664582e-05, "loss": 7.4925, "step": 117000 }, { "epoch": 0.45, "learning_rate": 4.255057997697337e-05, "loss": 7.4997, "step": 117500 }, { "epoch": 0.45, "learning_rate": 4.2518880317300916e-05, "loss": 7.5011, "step": 118000 }, { "epoch": 0.45, "learning_rate": 4.248718065762846e-05, "loss": 7.495, "step": 118500 }, { "epoch": 0.45, "learning_rate": 4.245548099795601e-05, "loss": 7.4749, "step": 119000 }, { "epoch": 0.45, "learning_rate": 4.242378133828355e-05, "loss": 7.5037, "step": 119500 }, { "epoch": 0.46, "learning_rate": 4.23920816786111e-05, "loss": 7.4854, "step": 120000 }, { "epoch": 0.46, "learning_rate": 4.2360382018938646e-05, "loss": 7.5, "step": 120500 }, { "epoch": 0.46, "learning_rate": 4.232868235926619e-05, "loss": 7.5072, "step": 121000 }, { "epoch": 0.46, "learning_rate": 4.229698269959374e-05, "loss": 7.4904, "step": 121500 }, { "epoch": 0.46, "learning_rate": 4.226528303992128e-05, "loss": 7.4916, "step": 122000 }, { "epoch": 0.47, "learning_rate": 4.223358338024883e-05, "loss": 7.4959, "step": 122500 }, { "epoch": 0.47, "learning_rate": 4.2201883720576376e-05, "loss": 7.5056, "step": 123000 }, { "epoch": 0.47, "learning_rate": 4.217018406090392e-05, "loss": 7.4923, "step": 123500 }, { "epoch": 0.47, "learning_rate": 4.213848440123147e-05, "loss": 7.4946, "step": 124000 }, { "epoch": 0.47, "learning_rate": 4.210678474155902e-05, "loss": 7.4985, "step": 124500 }, { "epoch": 0.48, "learning_rate": 4.207508508188656e-05, "loss": 7.4835, "step": 125000 }, { "epoch": 0.48, "learning_rate": 4.2043385422214107e-05, "loss": 7.4963, "step": 125500 }, { "epoch": 0.48, "learning_rate": 4.201168576254165e-05, "loss": 7.4945, "step": 126000 }, { "epoch": 0.48, "learning_rate": 4.19799861028692e-05, "loss": 7.4981, "step": 126500 }, { "epoch": 0.48, "learning_rate": 4.194828644319675e-05, "loss": 7.4966, "step": 127000 }, { "epoch": 0.49, "learning_rate": 4.1916586783524293e-05, "loss": 7.4856, "step": 127500 }, { "epoch": 0.49, "learning_rate": 4.1884887123851843e-05, "loss": 7.4974, "step": 128000 }, { "epoch": 0.49, "learning_rate": 4.185318746417939e-05, "loss": 7.4956, "step": 128500 }, { "epoch": 0.49, "learning_rate": 4.182148780450693e-05, "loss": 7.4918, "step": 129000 }, { "epoch": 0.49, "learning_rate": 4.178978814483448e-05, "loss": 7.5015, "step": 129500 }, { "epoch": 0.49, "learning_rate": 4.175808848516203e-05, "loss": 7.4968, "step": 130000 }, { "epoch": 0.5, "learning_rate": 4.1726388825489574e-05, "loss": 7.497, "step": 130500 }, { "epoch": 0.5, "learning_rate": 4.169468916581712e-05, "loss": 7.4941, "step": 131000 }, { "epoch": 0.5, "learning_rate": 4.166298950614466e-05, "loss": 7.5052, "step": 131500 }, { "epoch": 0.5, "learning_rate": 4.163128984647221e-05, "loss": 7.4969, "step": 132000 }, { "epoch": 0.5, "learning_rate": 4.159959018679976e-05, "loss": 7.4822, "step": 132500 }, { "epoch": 0.51, "learning_rate": 4.1567890527127304e-05, "loss": 7.4896, "step": 133000 }, { "epoch": 0.51, "learning_rate": 4.153619086745485e-05, "loss": 7.5044, "step": 133500 }, { "epoch": 0.51, "learning_rate": 4.15044912077824e-05, "loss": 7.4871, "step": 134000 }, { "epoch": 0.51, "learning_rate": 4.147279154810994e-05, "loss": 7.4949, "step": 134500 }, { "epoch": 0.51, "learning_rate": 4.144109188843749e-05, "loss": 7.4831, "step": 135000 }, { "epoch": 0.52, "learning_rate": 4.1409392228765034e-05, "loss": 7.4978, "step": 135500 }, { "epoch": 0.52, "learning_rate": 4.137769256909258e-05, "loss": 7.5074, "step": 136000 }, { "epoch": 0.52, "learning_rate": 4.134599290942013e-05, "loss": 7.4895, "step": 136500 }, { "epoch": 0.52, "learning_rate": 4.131429324974767e-05, "loss": 7.4818, "step": 137000 }, { "epoch": 0.52, "learning_rate": 4.128259359007522e-05, "loss": 7.5038, "step": 137500 }, { "epoch": 0.52, "learning_rate": 4.125089393040277e-05, "loss": 7.5023, "step": 138000 }, { "epoch": 0.53, "learning_rate": 4.121919427073031e-05, "loss": 7.4888, "step": 138500 }, { "epoch": 0.53, "learning_rate": 4.118749461105786e-05, "loss": 7.4901, "step": 139000 }, { "epoch": 0.53, "learning_rate": 4.11557949513854e-05, "loss": 7.4867, "step": 139500 }, { "epoch": 0.53, "learning_rate": 4.112409529171295e-05, "loss": 7.5052, "step": 140000 }, { "epoch": 0.53, "learning_rate": 4.10923956320405e-05, "loss": 7.4933, "step": 140500 }, { "epoch": 0.54, "learning_rate": 4.106069597236804e-05, "loss": 7.4912, "step": 141000 }, { "epoch": 0.54, "learning_rate": 4.102899631269559e-05, "loss": 7.497, "step": 141500 }, { "epoch": 0.54, "learning_rate": 4.099729665302314e-05, "loss": 7.4989, "step": 142000 }, { "epoch": 0.54, "learning_rate": 4.096559699335068e-05, "loss": 7.486, "step": 142500 }, { "epoch": 0.54, "learning_rate": 4.093389733367823e-05, "loss": 7.4852, "step": 143000 }, { "epoch": 0.55, "learning_rate": 4.0902197674005774e-05, "loss": 7.4835, "step": 143500 }, { "epoch": 0.55, "learning_rate": 4.087049801433332e-05, "loss": 7.4888, "step": 144000 }, { "epoch": 0.55, "learning_rate": 4.083879835466087e-05, "loss": 7.492, "step": 144500 }, { "epoch": 0.55, "learning_rate": 4.080709869498841e-05, "loss": 7.4884, "step": 145000 }, { "epoch": 0.55, "learning_rate": 4.077539903531596e-05, "loss": 7.4859, "step": 145500 }, { "epoch": 0.56, "learning_rate": 4.0743699375643505e-05, "loss": 7.4929, "step": 146000 }, { "epoch": 0.56, "learning_rate": 4.071199971597105e-05, "loss": 7.4726, "step": 146500 }, { "epoch": 0.56, "learning_rate": 4.06803000562986e-05, "loss": 7.4954, "step": 147000 }, { "epoch": 0.56, "learning_rate": 4.064860039662615e-05, "loss": 7.4859, "step": 147500 }, { "epoch": 0.56, "learning_rate": 4.061690073695369e-05, "loss": 7.4997, "step": 148000 }, { "epoch": 0.56, "learning_rate": 4.0585201077281235e-05, "loss": 7.4929, "step": 148500 }, { "epoch": 0.57, "learning_rate": 4.055350141760878e-05, "loss": 7.5029, "step": 149000 }, { "epoch": 0.57, "learning_rate": 4.052180175793633e-05, "loss": 7.4995, "step": 149500 }, { "epoch": 0.57, "learning_rate": 4.049010209826388e-05, "loss": 7.4838, "step": 150000 }, { "epoch": 0.57, "learning_rate": 4.045840243859142e-05, "loss": 7.4977, "step": 150500 }, { "epoch": 0.57, "learning_rate": 4.0426702778918965e-05, "loss": 7.4772, "step": 151000 }, { "epoch": 0.58, "learning_rate": 4.0395003119246515e-05, "loss": 7.4779, "step": 151500 }, { "epoch": 0.58, "learning_rate": 4.036330345957406e-05, "loss": 7.4878, "step": 152000 }, { "epoch": 0.58, "learning_rate": 4.033160379990161e-05, "loss": 7.4886, "step": 152500 }, { "epoch": 0.58, "learning_rate": 4.029990414022915e-05, "loss": 7.4924, "step": 153000 }, { "epoch": 0.58, "learning_rate": 4.0268204480556695e-05, "loss": 7.4925, "step": 153500 }, { "epoch": 0.59, "learning_rate": 4.0236504820884245e-05, "loss": 7.4942, "step": 154000 }, { "epoch": 0.59, "learning_rate": 4.020480516121179e-05, "loss": 7.4917, "step": 154500 }, { "epoch": 0.59, "learning_rate": 4.017310550153934e-05, "loss": 7.4959, "step": 155000 }, { "epoch": 0.59, "learning_rate": 4.014140584186689e-05, "loss": 7.5003, "step": 155500 }, { "epoch": 0.59, "learning_rate": 4.010970618219443e-05, "loss": 7.5046, "step": 156000 }, { "epoch": 0.6, "learning_rate": 4.0078006522521975e-05, "loss": 7.4904, "step": 156500 }, { "epoch": 0.6, "learning_rate": 4.004630686284952e-05, "loss": 7.4885, "step": 157000 }, { "epoch": 0.6, "learning_rate": 4.001460720317707e-05, "loss": 7.4943, "step": 157500 }, { "epoch": 0.6, "learning_rate": 3.998290754350462e-05, "loss": 7.4867, "step": 158000 }, { "epoch": 0.6, "learning_rate": 3.995120788383216e-05, "loss": 7.4789, "step": 158500 }, { "epoch": 0.6, "learning_rate": 3.9919508224159705e-05, "loss": 7.491, "step": 159000 }, { "epoch": 0.61, "learning_rate": 3.9887808564487255e-05, "loss": 7.4918, "step": 159500 }, { "epoch": 0.61, "learning_rate": 3.98561089048148e-05, "loss": 7.4913, "step": 160000 }, { "epoch": 0.61, "learning_rate": 3.982440924514235e-05, "loss": 7.4989, "step": 160500 }, { "epoch": 0.61, "learning_rate": 3.979270958546989e-05, "loss": 7.4798, "step": 161000 }, { "epoch": 0.61, "learning_rate": 3.9761009925797435e-05, "loss": 7.4842, "step": 161500 }, { "epoch": 0.62, "learning_rate": 3.9729310266124985e-05, "loss": 7.5008, "step": 162000 }, { "epoch": 0.62, "learning_rate": 3.969761060645253e-05, "loss": 7.4923, "step": 162500 }, { "epoch": 0.62, "learning_rate": 3.966591094678008e-05, "loss": 7.4895, "step": 163000 }, { "epoch": 0.62, "learning_rate": 3.963421128710762e-05, "loss": 7.4881, "step": 163500 }, { "epoch": 0.62, "learning_rate": 3.9602511627435166e-05, "loss": 7.4888, "step": 164000 }, { "epoch": 0.63, "learning_rate": 3.9570811967762716e-05, "loss": 7.4991, "step": 164500 }, { "epoch": 0.63, "learning_rate": 3.9539112308090266e-05, "loss": 7.4991, "step": 165000 }, { "epoch": 0.63, "learning_rate": 3.950741264841781e-05, "loss": 7.4837, "step": 165500 }, { "epoch": 0.63, "learning_rate": 3.947571298874536e-05, "loss": 7.4836, "step": 166000 }, { "epoch": 0.63, "learning_rate": 3.9444013329072896e-05, "loss": 7.5083, "step": 166500 }, { "epoch": 0.64, "learning_rate": 3.9412313669400446e-05, "loss": 7.4966, "step": 167000 }, { "epoch": 0.64, "learning_rate": 3.9380614009727996e-05, "loss": 7.494, "step": 167500 }, { "epoch": 0.64, "learning_rate": 3.934891435005554e-05, "loss": 7.4945, "step": 168000 }, { "epoch": 0.64, "learning_rate": 3.931721469038309e-05, "loss": 7.4825, "step": 168500 }, { "epoch": 0.64, "learning_rate": 3.928551503071063e-05, "loss": 7.5042, "step": 169000 }, { "epoch": 0.64, "learning_rate": 3.9253815371038176e-05, "loss": 7.4825, "step": 169500 }, { "epoch": 0.65, "learning_rate": 3.9222115711365726e-05, "loss": 7.501, "step": 170000 }, { "epoch": 0.65, "learning_rate": 3.919041605169327e-05, "loss": 7.5055, "step": 170500 }, { "epoch": 0.65, "learning_rate": 3.915871639202082e-05, "loss": 7.4882, "step": 171000 }, { "epoch": 0.65, "learning_rate": 3.912701673234836e-05, "loss": 7.4768, "step": 171500 }, { "epoch": 0.65, "learning_rate": 3.9095317072675906e-05, "loss": 7.4929, "step": 172000 }, { "epoch": 0.66, "learning_rate": 3.9063617413003456e-05, "loss": 7.4906, "step": 172500 }, { "epoch": 0.66, "learning_rate": 3.9031917753331006e-05, "loss": 7.4936, "step": 173000 }, { "epoch": 0.66, "learning_rate": 3.900021809365855e-05, "loss": 7.5112, "step": 173500 }, { "epoch": 0.66, "learning_rate": 3.896851843398609e-05, "loss": 7.4947, "step": 174000 }, { "epoch": 0.66, "learning_rate": 3.8936818774313636e-05, "loss": 7.4788, "step": 174500 }, { "epoch": 0.67, "learning_rate": 3.8905119114641186e-05, "loss": 7.4922, "step": 175000 }, { "epoch": 0.67, "learning_rate": 3.8873419454968736e-05, "loss": 7.5001, "step": 175500 }, { "epoch": 0.67, "learning_rate": 3.884171979529628e-05, "loss": 7.4882, "step": 176000 }, { "epoch": 0.67, "learning_rate": 3.881002013562382e-05, "loss": 7.5024, "step": 176500 }, { "epoch": 0.67, "learning_rate": 3.877832047595137e-05, "loss": 7.4859, "step": 177000 }, { "epoch": 0.68, "learning_rate": 3.8746620816278916e-05, "loss": 7.4837, "step": 177500 }, { "epoch": 0.68, "learning_rate": 3.8714921156606466e-05, "loss": 7.5076, "step": 178000 }, { "epoch": 0.68, "learning_rate": 3.8683221496934017e-05, "loss": 7.4998, "step": 178500 }, { "epoch": 0.68, "learning_rate": 3.865152183726155e-05, "loss": 7.491, "step": 179000 }, { "epoch": 0.68, "learning_rate": 3.86198221775891e-05, "loss": 7.4899, "step": 179500 }, { "epoch": 0.68, "learning_rate": 3.8588122517916647e-05, "loss": 7.488, "step": 180000 }, { "epoch": 0.69, "learning_rate": 3.8556422858244197e-05, "loss": 7.4895, "step": 180500 }, { "epoch": 0.69, "learning_rate": 3.852472319857175e-05, "loss": 7.4976, "step": 181000 }, { "epoch": 0.69, "learning_rate": 3.849302353889928e-05, "loss": 7.505, "step": 181500 }, { "epoch": 0.69, "learning_rate": 3.846132387922683e-05, "loss": 7.4959, "step": 182000 }, { "epoch": 0.69, "learning_rate": 3.8429624219554383e-05, "loss": 7.4872, "step": 182500 }, { "epoch": 0.7, "learning_rate": 3.839792455988193e-05, "loss": 7.5021, "step": 183000 }, { "epoch": 0.7, "learning_rate": 3.836622490020948e-05, "loss": 7.4847, "step": 183500 }, { "epoch": 0.7, "learning_rate": 3.833452524053702e-05, "loss": 7.4968, "step": 184000 }, { "epoch": 0.7, "learning_rate": 3.8302825580864563e-05, "loss": 7.4772, "step": 184500 }, { "epoch": 0.7, "learning_rate": 3.8271125921192114e-05, "loss": 7.479, "step": 185000 }, { "epoch": 0.71, "learning_rate": 3.823942626151966e-05, "loss": 7.4952, "step": 185500 }, { "epoch": 0.71, "learning_rate": 3.820772660184721e-05, "loss": 7.4871, "step": 186000 }, { "epoch": 0.71, "learning_rate": 3.817602694217475e-05, "loss": 7.4915, "step": 186500 }, { "epoch": 0.71, "learning_rate": 3.8144327282502294e-05, "loss": 7.486, "step": 187000 }, { "epoch": 0.71, "learning_rate": 3.8112627622829844e-05, "loss": 7.4672, "step": 187500 }, { "epoch": 0.72, "learning_rate": 3.808092796315739e-05, "loss": 7.4786, "step": 188000 }, { "epoch": 0.72, "learning_rate": 3.804922830348494e-05, "loss": 7.4983, "step": 188500 }, { "epoch": 0.72, "learning_rate": 3.801752864381248e-05, "loss": 7.5075, "step": 189000 }, { "epoch": 0.72, "learning_rate": 3.7985828984140024e-05, "loss": 7.4893, "step": 189500 }, { "epoch": 0.72, "learning_rate": 3.7954129324467574e-05, "loss": 7.5036, "step": 190000 }, { "epoch": 0.72, "learning_rate": 3.7922429664795124e-05, "loss": 7.4761, "step": 190500 }, { "epoch": 0.73, "learning_rate": 3.789073000512267e-05, "loss": 7.4805, "step": 191000 }, { "epoch": 0.73, "learning_rate": 3.785903034545021e-05, "loss": 7.4845, "step": 191500 }, { "epoch": 0.73, "learning_rate": 3.782733068577776e-05, "loss": 7.4894, "step": 192000 }, { "epoch": 0.73, "learning_rate": 3.7795631026105304e-05, "loss": 7.4943, "step": 192500 }, { "epoch": 0.73, "learning_rate": 3.7763931366432854e-05, "loss": 7.4942, "step": 193000 }, { "epoch": 0.74, "learning_rate": 3.77322317067604e-05, "loss": 7.4964, "step": 193500 }, { "epoch": 0.74, "learning_rate": 3.770053204708795e-05, "loss": 7.4966, "step": 194000 }, { "epoch": 0.74, "learning_rate": 3.766883238741549e-05, "loss": 7.5028, "step": 194500 }, { "epoch": 0.74, "learning_rate": 3.7637132727743034e-05, "loss": 7.4922, "step": 195000 }, { "epoch": 0.74, "learning_rate": 3.7605433068070584e-05, "loss": 7.5029, "step": 195500 }, { "epoch": 0.75, "learning_rate": 3.7573733408398134e-05, "loss": 7.48, "step": 196000 }, { "epoch": 0.75, "learning_rate": 3.754203374872568e-05, "loss": 7.4906, "step": 196500 }, { "epoch": 0.75, "learning_rate": 3.751033408905322e-05, "loss": 7.4937, "step": 197000 }, { "epoch": 0.75, "learning_rate": 3.7478634429380764e-05, "loss": 7.4733, "step": 197500 }, { "epoch": 0.75, "learning_rate": 3.7446934769708314e-05, "loss": 7.5046, "step": 198000 }, { "epoch": 0.76, "learning_rate": 3.7415235110035864e-05, "loss": 7.4886, "step": 198500 }, { "epoch": 0.76, "learning_rate": 3.738353545036341e-05, "loss": 7.4883, "step": 199000 }, { "epoch": 0.76, "learning_rate": 3.735183579069095e-05, "loss": 7.4905, "step": 199500 }, { "epoch": 0.76, "learning_rate": 3.73201361310185e-05, "loss": 7.4878, "step": 200000 }, { "epoch": 0.76, "learning_rate": 3.7288436471346044e-05, "loss": 7.4861, "step": 200500 }, { "epoch": 0.76, "learning_rate": 3.7256736811673595e-05, "loss": 7.4833, "step": 201000 }, { "epoch": 0.77, "learning_rate": 3.722503715200114e-05, "loss": 7.4814, "step": 201500 }, { "epoch": 0.77, "learning_rate": 3.719333749232868e-05, "loss": 7.4885, "step": 202000 }, { "epoch": 0.77, "learning_rate": 3.716163783265623e-05, "loss": 7.4981, "step": 202500 }, { "epoch": 0.77, "learning_rate": 3.7129938172983775e-05, "loss": 7.5081, "step": 203000 }, { "epoch": 0.77, "learning_rate": 3.7098238513311325e-05, "loss": 7.5021, "step": 203500 }, { "epoch": 0.78, "learning_rate": 3.706653885363887e-05, "loss": 7.4927, "step": 204000 }, { "epoch": 0.78, "learning_rate": 3.703483919396641e-05, "loss": 7.4876, "step": 204500 }, { "epoch": 0.78, "learning_rate": 3.700313953429396e-05, "loss": 7.4899, "step": 205000 }, { "epoch": 0.78, "learning_rate": 3.6971439874621505e-05, "loss": 7.4842, "step": 205500 }, { "epoch": 0.78, "learning_rate": 3.6939740214949055e-05, "loss": 7.4956, "step": 206000 }, { "epoch": 0.79, "learning_rate": 3.6908040555276605e-05, "loss": 7.4852, "step": 206500 }, { "epoch": 0.79, "learning_rate": 3.687634089560414e-05, "loss": 7.5173, "step": 207000 }, { "epoch": 0.79, "learning_rate": 3.684464123593169e-05, "loss": 7.4874, "step": 207500 }, { "epoch": 0.79, "learning_rate": 3.681294157625924e-05, "loss": 7.4758, "step": 208000 }, { "epoch": 0.79, "learning_rate": 3.6781241916586785e-05, "loss": 7.4979, "step": 208500 }, { "epoch": 0.8, "learning_rate": 3.6749542256914335e-05, "loss": 7.4813, "step": 209000 }, { "epoch": 0.8, "learning_rate": 3.671784259724188e-05, "loss": 7.4912, "step": 209500 }, { "epoch": 0.8, "learning_rate": 3.668614293756942e-05, "loss": 7.496, "step": 210000 }, { "epoch": 0.8, "learning_rate": 3.665444327789697e-05, "loss": 7.5007, "step": 210500 }, { "epoch": 0.8, "learning_rate": 3.6622743618224515e-05, "loss": 7.4935, "step": 211000 }, { "epoch": 0.8, "learning_rate": 3.6591043958552065e-05, "loss": 7.4968, "step": 211500 }, { "epoch": 0.81, "learning_rate": 3.655934429887961e-05, "loss": 7.4897, "step": 212000 }, { "epoch": 0.81, "learning_rate": 3.652764463920715e-05, "loss": 7.4955, "step": 212500 }, { "epoch": 0.81, "learning_rate": 3.64959449795347e-05, "loss": 7.5055, "step": 213000 }, { "epoch": 0.81, "learning_rate": 3.646424531986225e-05, "loss": 7.4854, "step": 213500 }, { "epoch": 0.81, "learning_rate": 3.6432545660189795e-05, "loss": 7.5171, "step": 214000 }, { "epoch": 0.82, "learning_rate": 3.640084600051734e-05, "loss": 7.4996, "step": 214500 }, { "epoch": 0.82, "learning_rate": 3.636914634084488e-05, "loss": 7.4906, "step": 215000 }, { "epoch": 0.82, "learning_rate": 3.633744668117243e-05, "loss": 7.4752, "step": 215500 }, { "epoch": 0.82, "learning_rate": 3.630574702149998e-05, "loss": 7.4973, "step": 216000 }, { "epoch": 0.82, "learning_rate": 3.6274047361827525e-05, "loss": 7.4903, "step": 216500 }, { "epoch": 0.83, "learning_rate": 3.624234770215507e-05, "loss": 7.4801, "step": 217000 }, { "epoch": 0.83, "learning_rate": 3.621064804248262e-05, "loss": 7.4868, "step": 217500 }, { "epoch": 0.83, "learning_rate": 3.617894838281016e-05, "loss": 7.4925, "step": 218000 }, { "epoch": 0.83, "learning_rate": 3.614724872313771e-05, "loss": 7.4862, "step": 218500 }, { "epoch": 0.83, "learning_rate": 3.6115549063465256e-05, "loss": 7.4982, "step": 219000 }, { "epoch": 0.83, "learning_rate": 3.60838494037928e-05, "loss": 7.4778, "step": 219500 }, { "epoch": 0.84, "learning_rate": 3.605214974412035e-05, "loss": 7.5073, "step": 220000 }, { "epoch": 0.84, "learning_rate": 3.602045008444789e-05, "loss": 7.4798, "step": 220500 }, { "epoch": 0.84, "learning_rate": 3.598875042477544e-05, "loss": 7.4791, "step": 221000 }, { "epoch": 0.84, "learning_rate": 3.595705076510299e-05, "loss": 7.4905, "step": 221500 }, { "epoch": 0.84, "learning_rate": 3.5925351105430536e-05, "loss": 7.4877, "step": 222000 }, { "epoch": 0.85, "learning_rate": 3.589365144575808e-05, "loss": 7.4945, "step": 222500 }, { "epoch": 0.85, "learning_rate": 3.586195178608562e-05, "loss": 7.4819, "step": 223000 }, { "epoch": 0.85, "learning_rate": 3.583025212641317e-05, "loss": 7.4984, "step": 223500 }, { "epoch": 0.85, "learning_rate": 3.579855246674072e-05, "loss": 7.5007, "step": 224000 }, { "epoch": 0.85, "learning_rate": 3.5766852807068266e-05, "loss": 7.4948, "step": 224500 }, { "epoch": 0.86, "learning_rate": 3.573515314739581e-05, "loss": 7.5001, "step": 225000 }, { "epoch": 0.86, "learning_rate": 3.570345348772336e-05, "loss": 7.4981, "step": 225500 }, { "epoch": 0.86, "learning_rate": 3.56717538280509e-05, "loss": 7.4831, "step": 226000 }, { "epoch": 0.86, "learning_rate": 3.564005416837845e-05, "loss": 7.4941, "step": 226500 }, { "epoch": 0.86, "learning_rate": 3.5608354508705996e-05, "loss": 7.4782, "step": 227000 }, { "epoch": 0.87, "learning_rate": 3.557665484903354e-05, "loss": 7.4813, "step": 227500 }, { "epoch": 0.87, "learning_rate": 3.554495518936109e-05, "loss": 7.4642, "step": 228000 }, { "epoch": 0.87, "learning_rate": 3.551325552968863e-05, "loss": 7.4816, "step": 228500 }, { "epoch": 0.87, "learning_rate": 3.548155587001618e-05, "loss": 7.509, "step": 229000 }, { "epoch": 0.87, "learning_rate": 3.5449856210343726e-05, "loss": 7.4822, "step": 229500 }, { "epoch": 0.87, "learning_rate": 3.541815655067127e-05, "loss": 7.4954, "step": 230000 }, { "epoch": 0.88, "learning_rate": 3.538645689099882e-05, "loss": 7.4901, "step": 230500 }, { "epoch": 0.88, "learning_rate": 3.535475723132637e-05, "loss": 7.496, "step": 231000 }, { "epoch": 0.88, "learning_rate": 3.532305757165391e-05, "loss": 7.4993, "step": 231500 }, { "epoch": 0.88, "learning_rate": 3.5291357911981456e-05, "loss": 7.4869, "step": 232000 }, { "epoch": 0.88, "learning_rate": 3.5259658252309e-05, "loss": 7.4902, "step": 232500 }, { "epoch": 0.89, "learning_rate": 3.522795859263655e-05, "loss": 7.5146, "step": 233000 }, { "epoch": 0.89, "learning_rate": 3.51962589329641e-05, "loss": 7.4939, "step": 233500 }, { "epoch": 0.89, "learning_rate": 3.516455927329164e-05, "loss": 7.4795, "step": 234000 }, { "epoch": 0.89, "learning_rate": 3.513285961361919e-05, "loss": 7.4806, "step": 234500 }, { "epoch": 0.89, "learning_rate": 3.5101159953946737e-05, "loss": 7.4974, "step": 235000 }, { "epoch": 0.9, "learning_rate": 3.506946029427428e-05, "loss": 7.4941, "step": 235500 }, { "epoch": 0.9, "learning_rate": 3.503776063460183e-05, "loss": 7.4865, "step": 236000 }, { "epoch": 0.9, "learning_rate": 3.500606097492937e-05, "loss": 7.4884, "step": 236500 }, { "epoch": 0.9, "learning_rate": 3.497436131525692e-05, "loss": 7.4856, "step": 237000 }, { "epoch": 0.9, "learning_rate": 3.494266165558447e-05, "loss": 7.4888, "step": 237500 }, { "epoch": 0.91, "learning_rate": 3.491096199591201e-05, "loss": 7.4799, "step": 238000 }, { "epoch": 0.91, "learning_rate": 3.487926233623956e-05, "loss": 7.503, "step": 238500 }, { "epoch": 0.91, "learning_rate": 3.484756267656711e-05, "loss": 7.4893, "step": 239000 }, { "epoch": 0.91, "learning_rate": 3.4815863016894653e-05, "loss": 7.4764, "step": 239500 }, { "epoch": 0.91, "learning_rate": 3.47841633572222e-05, "loss": 7.4831, "step": 240000 }, { "epoch": 0.91, "learning_rate": 3.475246369754975e-05, "loss": 7.4935, "step": 240500 }, { "epoch": 0.92, "learning_rate": 3.472076403787729e-05, "loss": 7.4765, "step": 241000 }, { "epoch": 0.92, "learning_rate": 3.468906437820484e-05, "loss": 7.4874, "step": 241500 }, { "epoch": 0.92, "learning_rate": 3.4657364718532384e-05, "loss": 7.4747, "step": 242000 }, { "epoch": 0.92, "learning_rate": 3.462566505885993e-05, "loss": 7.4763, "step": 242500 }, { "epoch": 0.92, "learning_rate": 3.459396539918748e-05, "loss": 7.4834, "step": 243000 }, { "epoch": 0.93, "learning_rate": 3.456226573951502e-05, "loss": 7.4977, "step": 243500 }, { "epoch": 0.93, "learning_rate": 3.453056607984257e-05, "loss": 7.4831, "step": 244000 }, { "epoch": 0.93, "learning_rate": 3.449886642017012e-05, "loss": 7.4984, "step": 244500 }, { "epoch": 0.93, "learning_rate": 3.446716676049766e-05, "loss": 7.4969, "step": 245000 }, { "epoch": 0.93, "learning_rate": 3.443546710082521e-05, "loss": 7.4881, "step": 245500 }, { "epoch": 0.94, "learning_rate": 3.440376744115275e-05, "loss": 7.4912, "step": 246000 }, { "epoch": 0.94, "learning_rate": 3.43720677814803e-05, "loss": 7.4867, "step": 246500 }, { "epoch": 0.94, "learning_rate": 3.434036812180785e-05, "loss": 7.4966, "step": 247000 }, { "epoch": 0.94, "learning_rate": 3.430866846213539e-05, "loss": 7.498, "step": 247500 }, { "epoch": 0.94, "learning_rate": 3.427696880246294e-05, "loss": 7.4854, "step": 248000 }, { "epoch": 0.95, "learning_rate": 3.424526914279049e-05, "loss": 7.5006, "step": 248500 }, { "epoch": 0.95, "learning_rate": 3.421356948311803e-05, "loss": 7.4864, "step": 249000 }, { "epoch": 0.95, "learning_rate": 3.418186982344558e-05, "loss": 7.4976, "step": 249500 }, { "epoch": 0.95, "learning_rate": 3.4150170163773124e-05, "loss": 7.5078, "step": 250000 }, { "epoch": 0.95, "learning_rate": 3.411847050410067e-05, "loss": 7.4829, "step": 250500 }, { "epoch": 0.95, "learning_rate": 3.408677084442822e-05, "loss": 7.477, "step": 251000 }, { "epoch": 0.96, "learning_rate": 3.405507118475576e-05, "loss": 7.4765, "step": 251500 }, { "epoch": 0.96, "learning_rate": 3.402337152508331e-05, "loss": 7.4881, "step": 252000 }, { "epoch": 0.96, "learning_rate": 3.3991671865410854e-05, "loss": 7.481, "step": 252500 }, { "epoch": 0.96, "learning_rate": 3.39599722057384e-05, "loss": 7.4841, "step": 253000 }, { "epoch": 0.96, "learning_rate": 3.392827254606595e-05, "loss": 7.4838, "step": 253500 }, { "epoch": 0.97, "learning_rate": 3.389657288639349e-05, "loss": 7.4762, "step": 254000 }, { "epoch": 0.97, "learning_rate": 3.386487322672104e-05, "loss": 7.4998, "step": 254500 }, { "epoch": 0.97, "learning_rate": 3.3833173567048584e-05, "loss": 7.4876, "step": 255000 }, { "epoch": 0.97, "learning_rate": 3.380147390737613e-05, "loss": 7.4914, "step": 255500 }, { "epoch": 0.97, "learning_rate": 3.376977424770368e-05, "loss": 7.505, "step": 256000 }, { "epoch": 0.98, "learning_rate": 3.373807458803123e-05, "loss": 7.493, "step": 256500 }, { "epoch": 0.98, "learning_rate": 3.370637492835877e-05, "loss": 7.4727, "step": 257000 }, { "epoch": 0.98, "learning_rate": 3.3674675268686315e-05, "loss": 7.4961, "step": 257500 }, { "epoch": 0.98, "learning_rate": 3.3642975609013865e-05, "loss": 7.4797, "step": 258000 }, { "epoch": 0.98, "learning_rate": 3.361127594934141e-05, "loss": 7.4963, "step": 258500 }, { "epoch": 0.99, "learning_rate": 3.357957628966896e-05, "loss": 7.4847, "step": 259000 }, { "epoch": 0.99, "learning_rate": 3.35478766299965e-05, "loss": 7.4999, "step": 259500 }, { "epoch": 0.99, "learning_rate": 3.3516176970324045e-05, "loss": 7.4945, "step": 260000 }, { "epoch": 0.99, "learning_rate": 3.3484477310651595e-05, "loss": 7.4948, "step": 260500 }, { "epoch": 0.99, "learning_rate": 3.345277765097914e-05, "loss": 7.482, "step": 261000 }, { "epoch": 0.99, "learning_rate": 3.342107799130669e-05, "loss": 7.49, "step": 261500 }, { "epoch": 1.0, "learning_rate": 3.338937833163424e-05, "loss": 7.4875, "step": 262000 }, { "epoch": 1.0, "learning_rate": 3.335767867196178e-05, "loss": 7.4904, "step": 262500 }, { "epoch": 1.0, "learning_rate": 3.3325979012289325e-05, "loss": 7.489, "step": 263000 }, { "epoch": 1.0, "learning_rate": 3.329427935261687e-05, "loss": 7.4787, "step": 263500 }, { "epoch": 1.0, "learning_rate": 3.326257969294442e-05, "loss": 7.5075, "step": 264000 }, { "epoch": 1.01, "learning_rate": 3.323088003327197e-05, "loss": 7.4919, "step": 264500 }, { "epoch": 1.01, "learning_rate": 3.319918037359951e-05, "loss": 7.4871, "step": 265000 }, { "epoch": 1.01, "learning_rate": 3.3167480713927055e-05, "loss": 7.488, "step": 265500 }, { "epoch": 1.01, "learning_rate": 3.3135781054254605e-05, "loss": 7.505, "step": 266000 }, { "epoch": 1.01, "learning_rate": 3.310408139458215e-05, "loss": 7.5001, "step": 266500 }, { "epoch": 1.02, "learning_rate": 3.30723817349097e-05, "loss": 7.4784, "step": 267000 }, { "epoch": 1.02, "learning_rate": 3.304068207523724e-05, "loss": 7.4899, "step": 267500 }, { "epoch": 1.02, "learning_rate": 3.3008982415564785e-05, "loss": 7.4622, "step": 268000 }, { "epoch": 1.02, "learning_rate": 3.2977282755892335e-05, "loss": 7.4945, "step": 268500 }, { "epoch": 1.02, "learning_rate": 3.294558309621988e-05, "loss": 7.4948, "step": 269000 }, { "epoch": 1.03, "learning_rate": 3.291388343654743e-05, "loss": 7.4886, "step": 269500 }, { "epoch": 1.03, "learning_rate": 3.288218377687497e-05, "loss": 7.4796, "step": 270000 }, { "epoch": 1.03, "learning_rate": 3.2850484117202515e-05, "loss": 7.4996, "step": 270500 }, { "epoch": 1.03, "learning_rate": 3.2818784457530065e-05, "loss": 7.4927, "step": 271000 }, { "epoch": 1.03, "learning_rate": 3.278708479785761e-05, "loss": 7.4866, "step": 271500 }, { "epoch": 1.03, "learning_rate": 3.275538513818516e-05, "loss": 7.5126, "step": 272000 }, { "epoch": 1.04, "learning_rate": 3.272368547851271e-05, "loss": 7.4901, "step": 272500 }, { "epoch": 1.04, "learning_rate": 3.2691985818840245e-05, "loss": 7.4772, "step": 273000 }, { "epoch": 1.04, "learning_rate": 3.2660286159167795e-05, "loss": 7.4876, "step": 273500 }, { "epoch": 1.04, "learning_rate": 3.2628586499495346e-05, "loss": 7.4819, "step": 274000 }, { "epoch": 1.04, "learning_rate": 3.259688683982289e-05, "loss": 7.4863, "step": 274500 }, { "epoch": 1.05, "learning_rate": 3.256518718015044e-05, "loss": 7.4794, "step": 275000 }, { "epoch": 1.05, "learning_rate": 3.253348752047798e-05, "loss": 7.4968, "step": 275500 }, { "epoch": 1.05, "learning_rate": 3.2501787860805526e-05, "loss": 7.4921, "step": 276000 }, { "epoch": 1.05, "learning_rate": 3.2470088201133076e-05, "loss": 7.4844, "step": 276500 }, { "epoch": 1.05, "learning_rate": 3.243838854146062e-05, "loss": 7.5079, "step": 277000 }, { "epoch": 1.06, "learning_rate": 3.240668888178817e-05, "loss": 7.4975, "step": 277500 }, { "epoch": 1.06, "learning_rate": 3.237498922211571e-05, "loss": 7.4969, "step": 278000 }, { "epoch": 1.06, "learning_rate": 3.2343289562443256e-05, "loss": 7.5057, "step": 278500 }, { "epoch": 1.06, "learning_rate": 3.2311589902770806e-05, "loss": 7.4997, "step": 279000 }, { "epoch": 1.06, "learning_rate": 3.2279890243098356e-05, "loss": 7.4847, "step": 279500 }, { "epoch": 1.07, "learning_rate": 3.22481905834259e-05, "loss": 7.4911, "step": 280000 }, { "epoch": 1.07, "learning_rate": 3.221649092375344e-05, "loss": 7.5051, "step": 280500 }, { "epoch": 1.07, "learning_rate": 3.2184791264080986e-05, "loss": 7.4742, "step": 281000 }, { "epoch": 1.07, "learning_rate": 3.2153091604408536e-05, "loss": 7.4969, "step": 281500 }, { "epoch": 1.07, "learning_rate": 3.2121391944736086e-05, "loss": 7.4884, "step": 282000 }, { "epoch": 1.07, "learning_rate": 3.208969228506363e-05, "loss": 7.4978, "step": 282500 }, { "epoch": 1.08, "learning_rate": 3.205799262539117e-05, "loss": 7.4849, "step": 283000 }, { "epoch": 1.08, "learning_rate": 3.202629296571872e-05, "loss": 7.4939, "step": 283500 }, { "epoch": 1.08, "learning_rate": 3.1994593306046266e-05, "loss": 7.4986, "step": 284000 }, { "epoch": 1.08, "learning_rate": 3.1962893646373816e-05, "loss": 7.4876, "step": 284500 }, { "epoch": 1.08, "learning_rate": 3.193119398670136e-05, "loss": 7.4895, "step": 285000 }, { "epoch": 1.09, "learning_rate": 3.18994943270289e-05, "loss": 7.49, "step": 285500 }, { "epoch": 1.09, "learning_rate": 3.186779466735645e-05, "loss": 7.4858, "step": 286000 }, { "epoch": 1.09, "learning_rate": 3.1836095007683996e-05, "loss": 7.492, "step": 286500 }, { "epoch": 1.09, "learning_rate": 3.1804395348011546e-05, "loss": 7.4854, "step": 287000 }, { "epoch": 1.09, "learning_rate": 3.1772695688339096e-05, "loss": 7.4993, "step": 287500 }, { "epoch": 1.1, "learning_rate": 3.174099602866663e-05, "loss": 7.4795, "step": 288000 }, { "epoch": 1.1, "learning_rate": 3.170929636899418e-05, "loss": 7.4866, "step": 288500 }, { "epoch": 1.1, "learning_rate": 3.167759670932173e-05, "loss": 7.4897, "step": 289000 }, { "epoch": 1.1, "learning_rate": 3.1645897049649276e-05, "loss": 7.4926, "step": 289500 }, { "epoch": 1.1, "learning_rate": 3.1614197389976827e-05, "loss": 7.4848, "step": 290000 }, { "epoch": 1.11, "learning_rate": 3.158249773030437e-05, "loss": 7.4971, "step": 290500 }, { "epoch": 1.11, "learning_rate": 3.155079807063191e-05, "loss": 7.4849, "step": 291000 }, { "epoch": 1.11, "learning_rate": 3.151909841095946e-05, "loss": 7.4962, "step": 291500 }, { "epoch": 1.11, "learning_rate": 3.1487398751287007e-05, "loss": 7.4818, "step": 292000 }, { "epoch": 1.11, "learning_rate": 3.145569909161456e-05, "loss": 7.4792, "step": 292500 }, { "epoch": 1.11, "learning_rate": 3.14239994319421e-05, "loss": 7.4821, "step": 293000 }, { "epoch": 1.12, "learning_rate": 3.139229977226964e-05, "loss": 7.4866, "step": 293500 }, { "epoch": 1.12, "learning_rate": 3.1360600112597193e-05, "loss": 7.4831, "step": 294000 }, { "epoch": 1.12, "learning_rate": 3.132890045292474e-05, "loss": 7.481, "step": 294500 }, { "epoch": 1.12, "learning_rate": 3.129720079325229e-05, "loss": 7.495, "step": 295000 }, { "epoch": 1.12, "learning_rate": 3.126550113357983e-05, "loss": 7.4764, "step": 295500 }, { "epoch": 1.13, "learning_rate": 3.1233801473907373e-05, "loss": 7.5023, "step": 296000 }, { "epoch": 1.13, "learning_rate": 3.1202101814234924e-05, "loss": 7.5015, "step": 296500 }, { "epoch": 1.13, "learning_rate": 3.1170402154562474e-05, "loss": 7.4767, "step": 297000 }, { "epoch": 1.13, "learning_rate": 3.113870249489002e-05, "loss": 7.4763, "step": 297500 }, { "epoch": 1.13, "learning_rate": 3.110700283521756e-05, "loss": 7.4957, "step": 298000 }, { "epoch": 1.14, "learning_rate": 3.1075303175545104e-05, "loss": 7.5016, "step": 298500 }, { "epoch": 1.14, "learning_rate": 3.1043603515872654e-05, "loss": 7.4787, "step": 299000 }, { "epoch": 1.14, "learning_rate": 3.1011903856200204e-05, "loss": 7.484, "step": 299500 }, { "epoch": 1.14, "learning_rate": 3.098020419652775e-05, "loss": 7.4945, "step": 300000 }, { "epoch": 1.14, "learning_rate": 3.09485045368553e-05, "loss": 7.5016, "step": 300500 }, { "epoch": 1.14, "learning_rate": 3.091680487718284e-05, "loss": 7.4848, "step": 301000 }, { "epoch": 1.15, "learning_rate": 3.0885105217510384e-05, "loss": 7.4909, "step": 301500 }, { "epoch": 1.15, "learning_rate": 3.0853405557837934e-05, "loss": 7.4784, "step": 302000 }, { "epoch": 1.15, "learning_rate": 3.082170589816548e-05, "loss": 7.4951, "step": 302500 }, { "epoch": 1.15, "learning_rate": 3.079000623849303e-05, "loss": 7.4843, "step": 303000 }, { "epoch": 1.15, "learning_rate": 3.075830657882057e-05, "loss": 7.5043, "step": 303500 }, { "epoch": 1.16, "learning_rate": 3.0726606919148114e-05, "loss": 7.4835, "step": 304000 }, { "epoch": 1.16, "learning_rate": 3.0694907259475664e-05, "loss": 7.4967, "step": 304500 }, { "epoch": 1.16, "learning_rate": 3.0663207599803214e-05, "loss": 7.4946, "step": 305000 }, { "epoch": 1.16, "learning_rate": 3.063150794013076e-05, "loss": 7.4888, "step": 305500 }, { "epoch": 1.16, "learning_rate": 3.05998082804583e-05, "loss": 7.4794, "step": 306000 }, { "epoch": 1.17, "learning_rate": 3.056810862078585e-05, "loss": 7.4754, "step": 306500 }, { "epoch": 1.17, "learning_rate": 3.0536408961113394e-05, "loss": 7.494, "step": 307000 }, { "epoch": 1.17, "learning_rate": 3.050470930144094e-05, "loss": 7.4847, "step": 307500 }, { "epoch": 1.17, "learning_rate": 3.0473009641768484e-05, "loss": 7.4921, "step": 308000 }, { "epoch": 1.17, "learning_rate": 3.0441309982096034e-05, "loss": 7.4799, "step": 308500 }, { "epoch": 1.18, "learning_rate": 3.040961032242358e-05, "loss": 7.5017, "step": 309000 }, { "epoch": 1.18, "learning_rate": 3.0377910662751124e-05, "loss": 7.4774, "step": 309500 }, { "epoch": 1.18, "learning_rate": 3.0346211003078674e-05, "loss": 7.4762, "step": 310000 }, { "epoch": 1.18, "learning_rate": 3.031451134340622e-05, "loss": 7.4793, "step": 310500 }, { "epoch": 1.18, "learning_rate": 3.0282811683733764e-05, "loss": 7.5039, "step": 311000 }, { "epoch": 1.18, "learning_rate": 3.025111202406131e-05, "loss": 7.4839, "step": 311500 }, { "epoch": 1.19, "learning_rate": 3.0219412364388854e-05, "loss": 7.484, "step": 312000 }, { "epoch": 1.19, "learning_rate": 3.0187712704716405e-05, "loss": 7.4838, "step": 312500 }, { "epoch": 1.19, "learning_rate": 3.015601304504395e-05, "loss": 7.4876, "step": 313000 }, { "epoch": 1.19, "learning_rate": 3.0124313385371495e-05, "loss": 7.4747, "step": 313500 }, { "epoch": 1.19, "learning_rate": 3.009261372569904e-05, "loss": 7.4817, "step": 314000 }, { "epoch": 1.2, "learning_rate": 3.006091406602659e-05, "loss": 7.4912, "step": 314500 }, { "epoch": 1.2, "learning_rate": 3.0029214406354135e-05, "loss": 7.4828, "step": 315000 }, { "epoch": 1.2, "learning_rate": 2.999751474668168e-05, "loss": 7.4845, "step": 315500 }, { "epoch": 1.2, "learning_rate": 2.9965815087009225e-05, "loss": 7.4827, "step": 316000 }, { "epoch": 1.2, "learning_rate": 2.993411542733677e-05, "loss": 7.4897, "step": 316500 }, { "epoch": 1.21, "learning_rate": 2.990241576766432e-05, "loss": 7.4898, "step": 317000 }, { "epoch": 1.21, "learning_rate": 2.9870716107991865e-05, "loss": 7.4848, "step": 317500 }, { "epoch": 1.21, "learning_rate": 2.983901644831941e-05, "loss": 7.4849, "step": 318000 }, { "epoch": 1.21, "learning_rate": 2.980731678864696e-05, "loss": 7.484, "step": 318500 }, { "epoch": 1.21, "learning_rate": 2.9775617128974505e-05, "loss": 7.4866, "step": 319000 }, { "epoch": 1.22, "learning_rate": 2.974391746930205e-05, "loss": 7.4855, "step": 319500 }, { "epoch": 1.22, "learning_rate": 2.97122178096296e-05, "loss": 7.483, "step": 320000 }, { "epoch": 1.22, "learning_rate": 2.968051814995714e-05, "loss": 7.4946, "step": 320500 }, { "epoch": 1.22, "learning_rate": 2.9648818490284692e-05, "loss": 7.4954, "step": 321000 }, { "epoch": 1.22, "learning_rate": 2.9617118830612235e-05, "loss": 7.4927, "step": 321500 }, { "epoch": 1.22, "learning_rate": 2.9585419170939782e-05, "loss": 7.4893, "step": 322000 }, { "epoch": 1.23, "learning_rate": 2.9553719511267332e-05, "loss": 7.4781, "step": 322500 }, { "epoch": 1.23, "learning_rate": 2.9522019851594872e-05, "loss": 7.4867, "step": 323000 }, { "epoch": 1.23, "learning_rate": 2.9490320191922422e-05, "loss": 7.489, "step": 323500 }, { "epoch": 1.23, "learning_rate": 2.945862053224997e-05, "loss": 7.4847, "step": 324000 }, { "epoch": 1.23, "learning_rate": 2.9426920872577512e-05, "loss": 7.4949, "step": 324500 }, { "epoch": 1.24, "learning_rate": 2.9395221212905062e-05, "loss": 7.4837, "step": 325000 }, { "epoch": 1.24, "learning_rate": 2.9363521553232602e-05, "loss": 7.4915, "step": 325500 }, { "epoch": 1.24, "learning_rate": 2.9331821893560152e-05, "loss": 7.5058, "step": 326000 }, { "epoch": 1.24, "learning_rate": 2.93001222338877e-05, "loss": 7.4839, "step": 326500 }, { "epoch": 1.24, "learning_rate": 2.9268422574215242e-05, "loss": 7.5023, "step": 327000 }, { "epoch": 1.25, "learning_rate": 2.9236722914542792e-05, "loss": 7.4928, "step": 327500 }, { "epoch": 1.25, "learning_rate": 2.920502325487034e-05, "loss": 7.4934, "step": 328000 }, { "epoch": 1.25, "learning_rate": 2.9173323595197882e-05, "loss": 7.4933, "step": 328500 }, { "epoch": 1.25, "learning_rate": 2.9141623935525432e-05, "loss": 7.4961, "step": 329000 }, { "epoch": 1.25, "learning_rate": 2.9109924275852972e-05, "loss": 7.4945, "step": 329500 }, { "epoch": 1.26, "learning_rate": 2.9078224616180522e-05, "loss": 7.4747, "step": 330000 }, { "epoch": 1.26, "learning_rate": 2.904652495650807e-05, "loss": 7.4819, "step": 330500 }, { "epoch": 1.26, "learning_rate": 2.9014825296835612e-05, "loss": 7.4906, "step": 331000 }, { "epoch": 1.26, "learning_rate": 2.8983125637163162e-05, "loss": 7.4994, "step": 331500 }, { "epoch": 1.26, "learning_rate": 2.895142597749071e-05, "loss": 7.4856, "step": 332000 }, { "epoch": 1.26, "learning_rate": 2.8919726317818252e-05, "loss": 7.4877, "step": 332500 }, { "epoch": 1.27, "learning_rate": 2.88880266581458e-05, "loss": 7.487, "step": 333000 }, { "epoch": 1.27, "learning_rate": 2.8856326998473342e-05, "loss": 7.4821, "step": 333500 }, { "epoch": 1.27, "learning_rate": 2.8824627338800892e-05, "loss": 7.4815, "step": 334000 }, { "epoch": 1.27, "learning_rate": 2.879292767912844e-05, "loss": 7.4797, "step": 334500 }, { "epoch": 1.27, "learning_rate": 2.8761228019455983e-05, "loss": 7.4878, "step": 335000 }, { "epoch": 1.28, "learning_rate": 2.872952835978353e-05, "loss": 7.4823, "step": 335500 }, { "epoch": 1.28, "learning_rate": 2.869782870011108e-05, "loss": 7.4816, "step": 336000 }, { "epoch": 1.28, "learning_rate": 2.8666129040438623e-05, "loss": 7.4992, "step": 336500 }, { "epoch": 1.28, "learning_rate": 2.863442938076617e-05, "loss": 7.4795, "step": 337000 }, { "epoch": 1.28, "learning_rate": 2.860272972109372e-05, "loss": 7.4896, "step": 337500 }, { "epoch": 1.29, "learning_rate": 2.8571030061421263e-05, "loss": 7.4846, "step": 338000 }, { "epoch": 1.29, "learning_rate": 2.853933040174881e-05, "loss": 7.4808, "step": 338500 }, { "epoch": 1.29, "learning_rate": 2.8507630742076353e-05, "loss": 7.4826, "step": 339000 }, { "epoch": 1.29, "learning_rate": 2.84759310824039e-05, "loss": 7.4996, "step": 339500 }, { "epoch": 1.29, "learning_rate": 2.844423142273145e-05, "loss": 7.492, "step": 340000 }, { "epoch": 1.3, "learning_rate": 2.8412531763058993e-05, "loss": 7.4846, "step": 340500 }, { "epoch": 1.3, "learning_rate": 2.838083210338654e-05, "loss": 7.4898, "step": 341000 }, { "epoch": 1.3, "learning_rate": 2.834913244371409e-05, "loss": 7.5043, "step": 341500 }, { "epoch": 1.3, "learning_rate": 2.831743278404163e-05, "loss": 7.4931, "step": 342000 }, { "epoch": 1.3, "learning_rate": 2.828573312436918e-05, "loss": 7.4835, "step": 342500 }, { "epoch": 1.3, "learning_rate": 2.8254033464696723e-05, "loss": 7.4702, "step": 343000 }, { "epoch": 1.31, "learning_rate": 2.822233380502427e-05, "loss": 7.4893, "step": 343500 }, { "epoch": 1.31, "learning_rate": 2.819063414535182e-05, "loss": 7.4891, "step": 344000 }, { "epoch": 1.31, "learning_rate": 2.815893448567936e-05, "loss": 7.4892, "step": 344500 }, { "epoch": 1.31, "learning_rate": 2.812723482600691e-05, "loss": 7.493, "step": 345000 }, { "epoch": 1.31, "learning_rate": 2.8095535166334457e-05, "loss": 7.4759, "step": 345500 }, { "epoch": 1.32, "learning_rate": 2.8063835506662e-05, "loss": 7.4783, "step": 346000 }, { "epoch": 1.32, "learning_rate": 2.803213584698955e-05, "loss": 7.4879, "step": 346500 }, { "epoch": 1.32, "learning_rate": 2.8000436187317093e-05, "loss": 7.4805, "step": 347000 }, { "epoch": 1.32, "learning_rate": 2.796873652764464e-05, "loss": 7.4743, "step": 347500 }, { "epoch": 1.32, "learning_rate": 2.7937036867972187e-05, "loss": 7.4829, "step": 348000 }, { "epoch": 1.33, "learning_rate": 2.790533720829973e-05, "loss": 7.4825, "step": 348500 }, { "epoch": 1.33, "learning_rate": 2.787363754862728e-05, "loss": 7.4946, "step": 349000 }, { "epoch": 1.33, "learning_rate": 2.7841937888954827e-05, "loss": 7.4916, "step": 349500 }, { "epoch": 1.33, "learning_rate": 2.781023822928237e-05, "loss": 7.4735, "step": 350000 }, { "epoch": 1.33, "learning_rate": 2.777853856960992e-05, "loss": 7.4845, "step": 350500 }, { "epoch": 1.34, "learning_rate": 2.774683890993746e-05, "loss": 7.4936, "step": 351000 }, { "epoch": 1.34, "learning_rate": 2.771513925026501e-05, "loss": 7.4954, "step": 351500 }, { "epoch": 1.34, "learning_rate": 2.7683439590592557e-05, "loss": 7.4748, "step": 352000 }, { "epoch": 1.34, "learning_rate": 2.76517399309201e-05, "loss": 7.5012, "step": 352500 }, { "epoch": 1.34, "learning_rate": 2.762004027124765e-05, "loss": 7.4788, "step": 353000 }, { "epoch": 1.34, "learning_rate": 2.7588340611575197e-05, "loss": 7.487, "step": 353500 }, { "epoch": 1.35, "learning_rate": 2.755664095190274e-05, "loss": 7.4891, "step": 354000 }, { "epoch": 1.35, "learning_rate": 2.7524941292230287e-05, "loss": 7.4856, "step": 354500 }, { "epoch": 1.35, "learning_rate": 2.7493241632557837e-05, "loss": 7.4819, "step": 355000 }, { "epoch": 1.35, "learning_rate": 2.746154197288538e-05, "loss": 7.4803, "step": 355500 }, { "epoch": 1.35, "learning_rate": 2.7429842313212927e-05, "loss": 7.4736, "step": 356000 }, { "epoch": 1.36, "learning_rate": 2.739814265354047e-05, "loss": 7.4798, "step": 356500 }, { "epoch": 1.36, "learning_rate": 2.736644299386802e-05, "loss": 7.5073, "step": 357000 }, { "epoch": 1.36, "learning_rate": 2.7334743334195567e-05, "loss": 7.4815, "step": 357500 }, { "epoch": 1.36, "learning_rate": 2.730304367452311e-05, "loss": 7.4827, "step": 358000 }, { "epoch": 1.36, "learning_rate": 2.7271344014850657e-05, "loss": 7.4744, "step": 358500 }, { "epoch": 1.37, "learning_rate": 2.7239644355178207e-05, "loss": 7.4977, "step": 359000 }, { "epoch": 1.37, "learning_rate": 2.720794469550575e-05, "loss": 7.4811, "step": 359500 }, { "epoch": 1.37, "learning_rate": 2.7176245035833297e-05, "loss": 7.4886, "step": 360000 }, { "epoch": 1.37, "learning_rate": 2.714454537616084e-05, "loss": 7.4825, "step": 360500 }, { "epoch": 1.37, "learning_rate": 2.7112845716488387e-05, "loss": 7.4851, "step": 361000 }, { "epoch": 1.38, "learning_rate": 2.7081146056815938e-05, "loss": 7.5017, "step": 361500 }, { "epoch": 1.38, "learning_rate": 2.704944639714348e-05, "loss": 7.4827, "step": 362000 }, { "epoch": 1.38, "learning_rate": 2.7017746737471028e-05, "loss": 7.4915, "step": 362500 }, { "epoch": 1.38, "learning_rate": 2.6986047077798578e-05, "loss": 7.501, "step": 363000 }, { "epoch": 1.38, "learning_rate": 2.6954347418126118e-05, "loss": 7.4834, "step": 363500 }, { "epoch": 1.38, "learning_rate": 2.6922647758453668e-05, "loss": 7.484, "step": 364000 }, { "epoch": 1.39, "learning_rate": 2.689094809878121e-05, "loss": 7.4934, "step": 364500 }, { "epoch": 1.39, "learning_rate": 2.6859248439108758e-05, "loss": 7.4851, "step": 365000 }, { "epoch": 1.39, "learning_rate": 2.6827548779436308e-05, "loss": 7.4833, "step": 365500 }, { "epoch": 1.39, "learning_rate": 2.679584911976385e-05, "loss": 7.4964, "step": 366000 }, { "epoch": 1.39, "learning_rate": 2.6764149460091398e-05, "loss": 7.4807, "step": 366500 }, { "epoch": 1.4, "learning_rate": 2.6732449800418944e-05, "loss": 7.4819, "step": 367000 }, { "epoch": 1.4, "learning_rate": 2.6700750140746488e-05, "loss": 7.4797, "step": 367500 }, { "epoch": 1.4, "learning_rate": 2.6669050481074038e-05, "loss": 7.4831, "step": 368000 }, { "epoch": 1.4, "learning_rate": 2.6637350821401585e-05, "loss": 7.4914, "step": 368500 }, { "epoch": 1.4, "learning_rate": 2.6605651161729128e-05, "loss": 7.4967, "step": 369000 }, { "epoch": 1.41, "learning_rate": 2.6573951502056678e-05, "loss": 7.4916, "step": 369500 }, { "epoch": 1.41, "learning_rate": 2.6542251842384218e-05, "loss": 7.4784, "step": 370000 }, { "epoch": 1.41, "learning_rate": 2.6510552182711768e-05, "loss": 7.507, "step": 370500 }, { "epoch": 1.41, "learning_rate": 2.6478852523039315e-05, "loss": 7.4964, "step": 371000 }, { "epoch": 1.41, "learning_rate": 2.6447152863366858e-05, "loss": 7.4925, "step": 371500 }, { "epoch": 1.42, "learning_rate": 2.6415453203694408e-05, "loss": 7.4782, "step": 372000 }, { "epoch": 1.42, "learning_rate": 2.6383753544021955e-05, "loss": 7.4683, "step": 372500 }, { "epoch": 1.42, "learning_rate": 2.6352053884349498e-05, "loss": 7.4785, "step": 373000 }, { "epoch": 1.42, "learning_rate": 2.6320354224677045e-05, "loss": 7.4876, "step": 373500 }, { "epoch": 1.42, "learning_rate": 2.6288654565004588e-05, "loss": 7.4889, "step": 374000 }, { "epoch": 1.42, "learning_rate": 2.6256954905332138e-05, "loss": 7.4773, "step": 374500 }, { "epoch": 1.43, "learning_rate": 2.6225255245659685e-05, "loss": 7.4971, "step": 375000 }, { "epoch": 1.43, "learning_rate": 2.6193555585987228e-05, "loss": 7.4915, "step": 375500 }, { "epoch": 1.43, "learning_rate": 2.6161855926314775e-05, "loss": 7.5012, "step": 376000 }, { "epoch": 1.43, "learning_rate": 2.6130156266642325e-05, "loss": 7.488, "step": 376500 }, { "epoch": 1.43, "learning_rate": 2.609845660696987e-05, "loss": 7.5013, "step": 377000 }, { "epoch": 1.44, "learning_rate": 2.6066756947297415e-05, "loss": 7.4978, "step": 377500 }, { "epoch": 1.44, "learning_rate": 2.603505728762496e-05, "loss": 7.489, "step": 378000 }, { "epoch": 1.44, "learning_rate": 2.600335762795251e-05, "loss": 7.4857, "step": 378500 }, { "epoch": 1.44, "learning_rate": 2.5971657968280055e-05, "loss": 7.4906, "step": 379000 }, { "epoch": 1.44, "learning_rate": 2.59399583086076e-05, "loss": 7.486, "step": 379500 }, { "epoch": 1.45, "learning_rate": 2.5908258648935145e-05, "loss": 7.4792, "step": 380000 }, { "epoch": 1.45, "learning_rate": 2.5876558989262695e-05, "loss": 7.4805, "step": 380500 }, { "epoch": 1.45, "learning_rate": 2.584485932959024e-05, "loss": 7.4858, "step": 381000 }, { "epoch": 1.45, "learning_rate": 2.5813159669917785e-05, "loss": 7.5006, "step": 381500 }, { "epoch": 1.45, "learning_rate": 2.578146001024533e-05, "loss": 7.4816, "step": 382000 }, { "epoch": 1.46, "learning_rate": 2.5749760350572875e-05, "loss": 7.4938, "step": 382500 }, { "epoch": 1.46, "learning_rate": 2.5718060690900425e-05, "loss": 7.4812, "step": 383000 }, { "epoch": 1.46, "learning_rate": 2.568636103122797e-05, "loss": 7.4793, "step": 383500 }, { "epoch": 1.46, "learning_rate": 2.5654661371555515e-05, "loss": 7.4912, "step": 384000 }, { "epoch": 1.46, "learning_rate": 2.5622961711883066e-05, "loss": 7.4782, "step": 384500 }, { "epoch": 1.46, "learning_rate": 2.559126205221061e-05, "loss": 7.4864, "step": 385000 }, { "epoch": 1.47, "learning_rate": 2.5559562392538156e-05, "loss": 7.4904, "step": 385500 }, { "epoch": 1.47, "learning_rate": 2.5527862732865702e-05, "loss": 7.4791, "step": 386000 }, { "epoch": 1.47, "learning_rate": 2.5496163073193246e-05, "loss": 7.4856, "step": 386500 }, { "epoch": 1.47, "learning_rate": 2.5464463413520796e-05, "loss": 7.4887, "step": 387000 }, { "epoch": 1.47, "learning_rate": 2.543276375384834e-05, "loss": 7.4914, "step": 387500 }, { "epoch": 1.48, "learning_rate": 2.5401064094175886e-05, "loss": 7.4958, "step": 388000 }, { "epoch": 1.48, "learning_rate": 2.5369364434503436e-05, "loss": 7.4776, "step": 388500 }, { "epoch": 1.48, "learning_rate": 2.5337664774830976e-05, "loss": 7.4802, "step": 389000 }, { "epoch": 1.48, "learning_rate": 2.5305965115158526e-05, "loss": 7.4843, "step": 389500 }, { "epoch": 1.48, "learning_rate": 2.5274265455486073e-05, "loss": 7.4739, "step": 390000 }, { "epoch": 1.49, "learning_rate": 2.5242565795813616e-05, "loss": 7.4993, "step": 390500 }, { "epoch": 1.49, "learning_rate": 2.5210866136141166e-05, "loss": 7.4867, "step": 391000 }, { "epoch": 1.49, "learning_rate": 2.5179166476468706e-05, "loss": 7.5017, "step": 391500 }, { "epoch": 1.49, "learning_rate": 2.5147466816796256e-05, "loss": 7.4876, "step": 392000 }, { "epoch": 1.49, "learning_rate": 2.5115767157123803e-05, "loss": 7.5033, "step": 392500 }, { "epoch": 1.49, "learning_rate": 2.5084067497451346e-05, "loss": 7.4735, "step": 393000 }, { "epoch": 1.5, "learning_rate": 2.5052367837778896e-05, "loss": 7.4946, "step": 393500 }, { "epoch": 1.5, "learning_rate": 2.5020668178106443e-05, "loss": 7.4833, "step": 394000 }, { "epoch": 1.5, "learning_rate": 2.4988968518433986e-05, "loss": 7.4954, "step": 394500 }, { "epoch": 1.5, "learning_rate": 2.4957268858761533e-05, "loss": 7.4927, "step": 395000 }, { "epoch": 1.5, "learning_rate": 2.492556919908908e-05, "loss": 7.4963, "step": 395500 }, { "epoch": 1.51, "learning_rate": 2.4893869539416626e-05, "loss": 7.4996, "step": 396000 }, { "epoch": 1.51, "learning_rate": 2.486216987974417e-05, "loss": 7.4848, "step": 396500 }, { "epoch": 1.51, "learning_rate": 2.483047022007172e-05, "loss": 7.4834, "step": 397000 }, { "epoch": 1.51, "learning_rate": 2.4798770560399266e-05, "loss": 7.4839, "step": 397500 }, { "epoch": 1.51, "learning_rate": 2.476707090072681e-05, "loss": 7.5004, "step": 398000 }, { "epoch": 1.52, "learning_rate": 2.4735371241054356e-05, "loss": 7.4833, "step": 398500 }, { "epoch": 1.52, "learning_rate": 2.4703671581381903e-05, "loss": 7.4885, "step": 399000 }, { "epoch": 1.52, "learning_rate": 2.467197192170945e-05, "loss": 7.4587, "step": 399500 }, { "epoch": 1.52, "learning_rate": 2.4640272262036996e-05, "loss": 7.4901, "step": 400000 }, { "epoch": 1.52, "learning_rate": 2.4608572602364543e-05, "loss": 7.487, "step": 400500 }, { "epoch": 1.53, "learning_rate": 2.457687294269209e-05, "loss": 7.5021, "step": 401000 }, { "epoch": 1.53, "learning_rate": 2.4545173283019633e-05, "loss": 7.4917, "step": 401500 }, { "epoch": 1.53, "learning_rate": 2.451347362334718e-05, "loss": 7.4905, "step": 402000 }, { "epoch": 1.53, "learning_rate": 2.448177396367473e-05, "loss": 7.4921, "step": 402500 }, { "epoch": 1.53, "learning_rate": 2.4450074304002273e-05, "loss": 7.4727, "step": 403000 }, { "epoch": 1.53, "learning_rate": 2.441837464432982e-05, "loss": 7.4932, "step": 403500 }, { "epoch": 1.54, "learning_rate": 2.4386674984657363e-05, "loss": 7.4857, "step": 404000 }, { "epoch": 1.54, "learning_rate": 2.4354975324984913e-05, "loss": 7.4678, "step": 404500 }, { "epoch": 1.54, "learning_rate": 2.432327566531246e-05, "loss": 7.5037, "step": 405000 }, { "epoch": 1.54, "learning_rate": 2.4291576005640003e-05, "loss": 7.4715, "step": 405500 }, { "epoch": 1.54, "learning_rate": 2.425987634596755e-05, "loss": 7.4757, "step": 406000 }, { "epoch": 1.55, "learning_rate": 2.4228176686295097e-05, "loss": 7.4853, "step": 406500 }, { "epoch": 1.55, "learning_rate": 2.4196477026622644e-05, "loss": 7.4942, "step": 407000 }, { "epoch": 1.55, "learning_rate": 2.416477736695019e-05, "loss": 7.4944, "step": 407500 }, { "epoch": 1.55, "learning_rate": 2.4133077707277734e-05, "loss": 7.4825, "step": 408000 }, { "epoch": 1.55, "learning_rate": 2.4101378047605284e-05, "loss": 7.4988, "step": 408500 }, { "epoch": 1.56, "learning_rate": 2.4069678387932827e-05, "loss": 7.4786, "step": 409000 }, { "epoch": 1.56, "learning_rate": 2.4037978728260374e-05, "loss": 7.4794, "step": 409500 }, { "epoch": 1.56, "learning_rate": 2.400627906858792e-05, "loss": 7.4878, "step": 410000 }, { "epoch": 1.56, "learning_rate": 2.3974579408915467e-05, "loss": 7.4908, "step": 410500 }, { "epoch": 1.56, "learning_rate": 2.3942879749243014e-05, "loss": 7.4775, "step": 411000 }, { "epoch": 1.57, "learning_rate": 2.391118008957056e-05, "loss": 7.4931, "step": 411500 }, { "epoch": 1.57, "learning_rate": 2.3879480429898104e-05, "loss": 7.479, "step": 412000 }, { "epoch": 1.57, "learning_rate": 2.3847780770225654e-05, "loss": 7.4955, "step": 412500 }, { "epoch": 1.57, "learning_rate": 2.3816081110553197e-05, "loss": 7.4721, "step": 413000 }, { "epoch": 1.57, "learning_rate": 2.3784381450880744e-05, "loss": 7.4964, "step": 413500 }, { "epoch": 1.57, "learning_rate": 2.375268179120829e-05, "loss": 7.4841, "step": 414000 }, { "epoch": 1.58, "learning_rate": 2.3720982131535837e-05, "loss": 7.4801, "step": 414500 }, { "epoch": 1.58, "learning_rate": 2.3689282471863384e-05, "loss": 7.4837, "step": 415000 }, { "epoch": 1.58, "learning_rate": 2.3657582812190927e-05, "loss": 7.4862, "step": 415500 }, { "epoch": 1.58, "learning_rate": 2.3625883152518477e-05, "loss": 7.4814, "step": 416000 }, { "epoch": 1.58, "learning_rate": 2.3594183492846024e-05, "loss": 7.4863, "step": 416500 }, { "epoch": 1.59, "learning_rate": 2.3562483833173567e-05, "loss": 7.4836, "step": 417000 }, { "epoch": 1.59, "learning_rate": 2.3530784173501114e-05, "loss": 7.4956, "step": 417500 }, { "epoch": 1.59, "learning_rate": 2.349908451382866e-05, "loss": 7.4953, "step": 418000 }, { "epoch": 1.59, "learning_rate": 2.3467384854156208e-05, "loss": 7.4966, "step": 418500 }, { "epoch": 1.59, "learning_rate": 2.3435685194483754e-05, "loss": 7.4903, "step": 419000 }, { "epoch": 1.6, "learning_rate": 2.3403985534811298e-05, "loss": 7.491, "step": 419500 }, { "epoch": 1.6, "learning_rate": 2.3372285875138848e-05, "loss": 7.4829, "step": 420000 }, { "epoch": 1.6, "learning_rate": 2.334058621546639e-05, "loss": 7.4785, "step": 420500 }, { "epoch": 1.6, "learning_rate": 2.3308886555793938e-05, "loss": 7.4789, "step": 421000 }, { "epoch": 1.6, "learning_rate": 2.3277186896121484e-05, "loss": 7.4857, "step": 421500 }, { "epoch": 1.61, "learning_rate": 2.324548723644903e-05, "loss": 7.4795, "step": 422000 }, { "epoch": 1.61, "learning_rate": 2.3213787576776578e-05, "loss": 7.4787, "step": 422500 }, { "epoch": 1.61, "learning_rate": 2.318208791710412e-05, "loss": 7.4901, "step": 423000 }, { "epoch": 1.61, "learning_rate": 2.3150388257431668e-05, "loss": 7.4851, "step": 423500 }, { "epoch": 1.61, "learning_rate": 2.3118688597759218e-05, "loss": 7.4885, "step": 424000 }, { "epoch": 1.61, "learning_rate": 2.308698893808676e-05, "loss": 7.4778, "step": 424500 }, { "epoch": 1.62, "learning_rate": 2.3055289278414308e-05, "loss": 7.4719, "step": 425000 }, { "epoch": 1.62, "learning_rate": 2.3023589618741855e-05, "loss": 7.4956, "step": 425500 }, { "epoch": 1.62, "learning_rate": 2.29918899590694e-05, "loss": 7.4809, "step": 426000 }, { "epoch": 1.62, "learning_rate": 2.2960190299396948e-05, "loss": 7.4857, "step": 426500 }, { "epoch": 1.62, "learning_rate": 2.292849063972449e-05, "loss": 7.4774, "step": 427000 }, { "epoch": 1.63, "learning_rate": 2.2896790980052038e-05, "loss": 7.479, "step": 427500 }, { "epoch": 1.63, "learning_rate": 2.2865091320379585e-05, "loss": 7.4812, "step": 428000 }, { "epoch": 1.63, "learning_rate": 2.283339166070713e-05, "loss": 7.4783, "step": 428500 }, { "epoch": 1.63, "learning_rate": 2.2801692001034678e-05, "loss": 7.4832, "step": 429000 }, { "epoch": 1.63, "learning_rate": 2.276999234136222e-05, "loss": 7.4875, "step": 429500 }, { "epoch": 1.64, "learning_rate": 2.273829268168977e-05, "loss": 7.4899, "step": 430000 }, { "epoch": 1.64, "learning_rate": 2.270659302201732e-05, "loss": 7.4836, "step": 430500 }, { "epoch": 1.64, "learning_rate": 2.267489336234486e-05, "loss": 7.4894, "step": 431000 }, { "epoch": 1.64, "learning_rate": 2.2643193702672412e-05, "loss": 7.4748, "step": 431500 }, { "epoch": 1.64, "learning_rate": 2.2611494042999955e-05, "loss": 7.4979, "step": 432000 }, { "epoch": 1.65, "learning_rate": 2.2579794383327502e-05, "loss": 7.509, "step": 432500 }, { "epoch": 1.65, "learning_rate": 2.254809472365505e-05, "loss": 7.4842, "step": 433000 }, { "epoch": 1.65, "learning_rate": 2.2516395063982595e-05, "loss": 7.4776, "step": 433500 }, { "epoch": 1.65, "learning_rate": 2.2484695404310142e-05, "loss": 7.488, "step": 434000 }, { "epoch": 1.65, "learning_rate": 2.2452995744637685e-05, "loss": 7.5027, "step": 434500 }, { "epoch": 1.65, "learning_rate": 2.2421296084965232e-05, "loss": 7.4735, "step": 435000 }, { "epoch": 1.66, "learning_rate": 2.2389596425292782e-05, "loss": 7.4812, "step": 435500 }, { "epoch": 1.66, "learning_rate": 2.2357896765620325e-05, "loss": 7.5071, "step": 436000 }, { "epoch": 1.66, "learning_rate": 2.2326197105947872e-05, "loss": 7.4867, "step": 436500 }, { "epoch": 1.66, "learning_rate": 2.2294497446275415e-05, "loss": 7.4881, "step": 437000 }, { "epoch": 1.66, "learning_rate": 2.2262797786602965e-05, "loss": 7.4917, "step": 437500 }, { "epoch": 1.67, "learning_rate": 2.2231098126930512e-05, "loss": 7.4892, "step": 438000 }, { "epoch": 1.67, "learning_rate": 2.2199398467258055e-05, "loss": 7.4777, "step": 438500 }, { "epoch": 1.67, "learning_rate": 2.2167698807585602e-05, "loss": 7.5001, "step": 439000 }, { "epoch": 1.67, "learning_rate": 2.213599914791315e-05, "loss": 7.4948, "step": 439500 }, { "epoch": 1.67, "learning_rate": 2.2104299488240696e-05, "loss": 7.4712, "step": 440000 }, { "epoch": 1.68, "learning_rate": 2.2072599828568242e-05, "loss": 7.4989, "step": 440500 }, { "epoch": 1.68, "learning_rate": 2.2040900168895786e-05, "loss": 7.4885, "step": 441000 }, { "epoch": 1.68, "learning_rate": 2.2009200509223336e-05, "loss": 7.4871, "step": 441500 }, { "epoch": 1.68, "learning_rate": 2.197750084955088e-05, "loss": 7.4881, "step": 442000 }, { "epoch": 1.68, "learning_rate": 2.1945801189878426e-05, "loss": 7.4896, "step": 442500 }, { "epoch": 1.69, "learning_rate": 2.1914101530205972e-05, "loss": 7.4904, "step": 443000 }, { "epoch": 1.69, "learning_rate": 2.188240187053352e-05, "loss": 7.4898, "step": 443500 }, { "epoch": 1.69, "learning_rate": 2.1850702210861066e-05, "loss": 7.4957, "step": 444000 }, { "epoch": 1.69, "learning_rate": 2.1819002551188612e-05, "loss": 7.4865, "step": 444500 }, { "epoch": 1.69, "learning_rate": 2.1787302891516156e-05, "loss": 7.4915, "step": 445000 }, { "epoch": 1.69, "learning_rate": 2.1755603231843706e-05, "loss": 7.497, "step": 445500 }, { "epoch": 1.7, "learning_rate": 2.172390357217125e-05, "loss": 7.4874, "step": 446000 }, { "epoch": 1.7, "learning_rate": 2.1692203912498796e-05, "loss": 7.4772, "step": 446500 }, { "epoch": 1.7, "learning_rate": 2.1660504252826343e-05, "loss": 7.4974, "step": 447000 }, { "epoch": 1.7, "learning_rate": 2.162880459315389e-05, "loss": 7.4917, "step": 447500 }, { "epoch": 1.7, "learning_rate": 2.1597104933481436e-05, "loss": 7.4971, "step": 448000 }, { "epoch": 1.71, "learning_rate": 2.156540527380898e-05, "loss": 7.4751, "step": 448500 }, { "epoch": 1.71, "learning_rate": 2.153370561413653e-05, "loss": 7.4918, "step": 449000 }, { "epoch": 1.71, "learning_rate": 2.1502005954464076e-05, "loss": 7.4861, "step": 449500 }, { "epoch": 1.71, "learning_rate": 2.147030629479162e-05, "loss": 7.4963, "step": 450000 }, { "epoch": 1.71, "learning_rate": 2.1438606635119166e-05, "loss": 7.4781, "step": 450500 }, { "epoch": 1.72, "learning_rate": 2.1406906975446713e-05, "loss": 7.4913, "step": 451000 }, { "epoch": 1.72, "learning_rate": 2.137520731577426e-05, "loss": 7.4899, "step": 451500 }, { "epoch": 1.72, "learning_rate": 2.1343507656101806e-05, "loss": 7.4869, "step": 452000 }, { "epoch": 1.72, "learning_rate": 2.131180799642935e-05, "loss": 7.4908, "step": 452500 }, { "epoch": 1.72, "learning_rate": 2.12801083367569e-05, "loss": 7.4979, "step": 453000 }, { "epoch": 1.73, "learning_rate": 2.1248408677084443e-05, "loss": 7.4752, "step": 453500 }, { "epoch": 1.73, "learning_rate": 2.121670901741199e-05, "loss": 7.4942, "step": 454000 }, { "epoch": 1.73, "learning_rate": 2.1185009357739536e-05, "loss": 7.4794, "step": 454500 }, { "epoch": 1.73, "learning_rate": 2.1153309698067083e-05, "loss": 7.5003, "step": 455000 }, { "epoch": 1.73, "learning_rate": 2.112161003839463e-05, "loss": 7.4842, "step": 455500 }, { "epoch": 1.73, "learning_rate": 2.1089910378722173e-05, "loss": 7.49, "step": 456000 }, { "epoch": 1.74, "learning_rate": 2.105821071904972e-05, "loss": 7.4955, "step": 456500 }, { "epoch": 1.74, "learning_rate": 2.102651105937727e-05, "loss": 7.4922, "step": 457000 }, { "epoch": 1.74, "learning_rate": 2.0994811399704813e-05, "loss": 7.4992, "step": 457500 }, { "epoch": 1.74, "learning_rate": 2.096311174003236e-05, "loss": 7.4878, "step": 458000 }, { "epoch": 1.74, "learning_rate": 2.0931412080359907e-05, "loss": 7.4955, "step": 458500 }, { "epoch": 1.75, "learning_rate": 2.0899712420687453e-05, "loss": 7.4969, "step": 459000 }, { "epoch": 1.75, "learning_rate": 2.0868012761015e-05, "loss": 7.5065, "step": 459500 }, { "epoch": 1.75, "learning_rate": 2.0836313101342543e-05, "loss": 7.4951, "step": 460000 }, { "epoch": 1.75, "learning_rate": 2.080461344167009e-05, "loss": 7.485, "step": 460500 }, { "epoch": 1.75, "learning_rate": 2.0772913781997637e-05, "loss": 7.4797, "step": 461000 }, { "epoch": 1.76, "learning_rate": 2.0741214122325183e-05, "loss": 7.495, "step": 461500 }, { "epoch": 1.76, "learning_rate": 2.070951446265273e-05, "loss": 7.4678, "step": 462000 }, { "epoch": 1.76, "learning_rate": 2.0677814802980274e-05, "loss": 7.5105, "step": 462500 }, { "epoch": 1.76, "learning_rate": 2.0646115143307824e-05, "loss": 7.4975, "step": 463000 }, { "epoch": 1.76, "learning_rate": 2.061441548363537e-05, "loss": 7.483, "step": 463500 }, { "epoch": 1.77, "learning_rate": 2.0582715823962914e-05, "loss": 7.4876, "step": 464000 }, { "epoch": 1.77, "learning_rate": 2.0551016164290464e-05, "loss": 7.5046, "step": 464500 }, { "epoch": 1.77, "learning_rate": 2.0519316504618007e-05, "loss": 7.4868, "step": 465000 }, { "epoch": 1.77, "learning_rate": 2.0487616844945554e-05, "loss": 7.4981, "step": 465500 }, { "epoch": 1.77, "learning_rate": 2.04559171852731e-05, "loss": 7.4838, "step": 466000 }, { "epoch": 1.77, "learning_rate": 2.0424217525600647e-05, "loss": 7.4986, "step": 466500 }, { "epoch": 1.78, "learning_rate": 2.0392517865928194e-05, "loss": 7.4767, "step": 467000 }, { "epoch": 1.78, "learning_rate": 2.0360818206255737e-05, "loss": 7.4845, "step": 467500 }, { "epoch": 1.78, "learning_rate": 2.0329118546583284e-05, "loss": 7.4986, "step": 468000 }, { "epoch": 1.78, "learning_rate": 2.029741888691083e-05, "loss": 7.4984, "step": 468500 }, { "epoch": 1.78, "learning_rate": 2.0265719227238377e-05, "loss": 7.4851, "step": 469000 }, { "epoch": 1.79, "learning_rate": 2.0234019567565924e-05, "loss": 7.4876, "step": 469500 }, { "epoch": 1.79, "learning_rate": 2.0202319907893467e-05, "loss": 7.4906, "step": 470000 }, { "epoch": 1.79, "learning_rate": 2.0170620248221017e-05, "loss": 7.4916, "step": 470500 }, { "epoch": 1.79, "learning_rate": 2.0138920588548564e-05, "loss": 7.4783, "step": 471000 }, { "epoch": 1.79, "learning_rate": 2.0107220928876107e-05, "loss": 7.492, "step": 471500 }, { "epoch": 1.8, "learning_rate": 2.0075521269203654e-05, "loss": 7.4747, "step": 472000 }, { "epoch": 1.8, "learning_rate": 2.00438216095312e-05, "loss": 7.4935, "step": 472500 }, { "epoch": 1.8, "learning_rate": 2.0012121949858748e-05, "loss": 7.4952, "step": 473000 }, { "epoch": 1.8, "learning_rate": 1.9980422290186294e-05, "loss": 7.4861, "step": 473500 }, { "epoch": 1.8, "learning_rate": 1.9948722630513838e-05, "loss": 7.4757, "step": 474000 }, { "epoch": 1.8, "learning_rate": 1.9917022970841388e-05, "loss": 7.4784, "step": 474500 }, { "epoch": 1.81, "learning_rate": 1.988532331116893e-05, "loss": 7.5014, "step": 475000 }, { "epoch": 1.81, "learning_rate": 1.9853623651496478e-05, "loss": 7.4779, "step": 475500 }, { "epoch": 1.81, "learning_rate": 1.9821923991824024e-05, "loss": 7.4747, "step": 476000 }, { "epoch": 1.81, "learning_rate": 1.979022433215157e-05, "loss": 7.4924, "step": 476500 }, { "epoch": 1.81, "learning_rate": 1.9758524672479118e-05, "loss": 7.4931, "step": 477000 }, { "epoch": 1.82, "learning_rate": 1.9726825012806664e-05, "loss": 7.5003, "step": 477500 }, { "epoch": 1.82, "learning_rate": 1.9695125353134208e-05, "loss": 7.4917, "step": 478000 }, { "epoch": 1.82, "learning_rate": 1.9663425693461758e-05, "loss": 7.485, "step": 478500 }, { "epoch": 1.82, "learning_rate": 1.96317260337893e-05, "loss": 7.4869, "step": 479000 }, { "epoch": 1.82, "learning_rate": 1.9600026374116848e-05, "loss": 7.5071, "step": 479500 }, { "epoch": 1.83, "learning_rate": 1.9568326714444395e-05, "loss": 7.4769, "step": 480000 }, { "epoch": 1.83, "learning_rate": 1.953662705477194e-05, "loss": 7.4922, "step": 480500 }, { "epoch": 1.83, "learning_rate": 1.9504927395099488e-05, "loss": 7.4746, "step": 481000 }, { "epoch": 1.83, "learning_rate": 1.947322773542703e-05, "loss": 7.4716, "step": 481500 }, { "epoch": 1.83, "learning_rate": 1.944152807575458e-05, "loss": 7.4843, "step": 482000 }, { "epoch": 1.84, "learning_rate": 1.9409828416082128e-05, "loss": 7.4858, "step": 482500 }, { "epoch": 1.84, "learning_rate": 1.937812875640967e-05, "loss": 7.4732, "step": 483000 }, { "epoch": 1.84, "learning_rate": 1.9346429096737218e-05, "loss": 7.4873, "step": 483500 }, { "epoch": 1.84, "learning_rate": 1.9314729437064765e-05, "loss": 7.4883, "step": 484000 }, { "epoch": 1.84, "learning_rate": 1.928302977739231e-05, "loss": 7.4897, "step": 484500 }, { "epoch": 1.84, "learning_rate": 1.9251330117719858e-05, "loss": 7.4871, "step": 485000 }, { "epoch": 1.85, "learning_rate": 1.92196304580474e-05, "loss": 7.4895, "step": 485500 }, { "epoch": 1.85, "learning_rate": 1.918793079837495e-05, "loss": 7.5044, "step": 486000 }, { "epoch": 1.85, "learning_rate": 1.9156231138702495e-05, "loss": 7.4946, "step": 486500 }, { "epoch": 1.85, "learning_rate": 1.912453147903004e-05, "loss": 7.4789, "step": 487000 }, { "epoch": 1.85, "learning_rate": 1.909283181935759e-05, "loss": 7.48, "step": 487500 }, { "epoch": 1.86, "learning_rate": 1.9061132159685135e-05, "loss": 7.4858, "step": 488000 }, { "epoch": 1.86, "learning_rate": 1.9029432500012682e-05, "loss": 7.5018, "step": 488500 }, { "epoch": 1.86, "learning_rate": 1.8997732840340225e-05, "loss": 7.4792, "step": 489000 }, { "epoch": 1.86, "learning_rate": 1.8966033180667772e-05, "loss": 7.4872, "step": 489500 }, { "epoch": 1.86, "learning_rate": 1.8934333520995322e-05, "loss": 7.4953, "step": 490000 }, { "epoch": 1.87, "learning_rate": 1.8902633861322865e-05, "loss": 7.4795, "step": 490500 }, { "epoch": 1.87, "learning_rate": 1.8870934201650412e-05, "loss": 7.4935, "step": 491000 }, { "epoch": 1.87, "learning_rate": 1.883923454197796e-05, "loss": 7.4772, "step": 491500 }, { "epoch": 1.87, "learning_rate": 1.8807534882305505e-05, "loss": 7.4874, "step": 492000 }, { "epoch": 1.87, "learning_rate": 1.8775835222633052e-05, "loss": 7.4835, "step": 492500 }, { "epoch": 1.88, "learning_rate": 1.8744135562960595e-05, "loss": 7.4861, "step": 493000 }, { "epoch": 1.88, "learning_rate": 1.8712435903288142e-05, "loss": 7.49, "step": 493500 }, { "epoch": 1.88, "learning_rate": 1.868073624361569e-05, "loss": 7.4842, "step": 494000 }, { "epoch": 1.88, "learning_rate": 1.8649036583943235e-05, "loss": 7.4898, "step": 494500 }, { "epoch": 1.88, "learning_rate": 1.8617336924270782e-05, "loss": 7.5066, "step": 495000 }, { "epoch": 1.88, "learning_rate": 1.858563726459833e-05, "loss": 7.5039, "step": 495500 }, { "epoch": 1.89, "learning_rate": 1.8553937604925876e-05, "loss": 7.4901, "step": 496000 }, { "epoch": 1.89, "learning_rate": 1.8522237945253422e-05, "loss": 7.4683, "step": 496500 }, { "epoch": 1.89, "learning_rate": 1.8490538285580966e-05, "loss": 7.4971, "step": 497000 }, { "epoch": 1.89, "learning_rate": 1.8458838625908516e-05, "loss": 7.4787, "step": 497500 }, { "epoch": 1.89, "learning_rate": 1.842713896623606e-05, "loss": 7.4902, "step": 498000 }, { "epoch": 1.9, "learning_rate": 1.8395439306563606e-05, "loss": 7.4826, "step": 498500 }, { "epoch": 1.9, "learning_rate": 1.8363739646891152e-05, "loss": 7.4757, "step": 499000 }, { "epoch": 1.9, "learning_rate": 1.83320399872187e-05, "loss": 7.4912, "step": 499500 }, { "epoch": 1.9, "learning_rate": 1.8300340327546246e-05, "loss": 7.4803, "step": 500000 }, { "epoch": 1.9, "learning_rate": 1.826864066787379e-05, "loss": 7.4893, "step": 500500 }, { "epoch": 1.91, "learning_rate": 1.8236941008201336e-05, "loss": 7.4932, "step": 501000 }, { "epoch": 1.91, "learning_rate": 1.8205241348528883e-05, "loss": 7.4942, "step": 501500 }, { "epoch": 1.91, "learning_rate": 1.817354168885643e-05, "loss": 7.4846, "step": 502000 }, { "epoch": 1.91, "learning_rate": 1.8141842029183976e-05, "loss": 7.501, "step": 502500 }, { "epoch": 1.91, "learning_rate": 1.811014236951152e-05, "loss": 7.4974, "step": 503000 }, { "epoch": 1.92, "learning_rate": 1.807844270983907e-05, "loss": 7.485, "step": 503500 }, { "epoch": 1.92, "learning_rate": 1.8046743050166616e-05, "loss": 7.4922, "step": 504000 }, { "epoch": 1.92, "learning_rate": 1.801504339049416e-05, "loss": 7.4862, "step": 504500 }, { "epoch": 1.92, "learning_rate": 1.7983343730821706e-05, "loss": 7.4751, "step": 505000 }, { "epoch": 1.92, "learning_rate": 1.7951644071149253e-05, "loss": 7.4911, "step": 505500 }, { "epoch": 1.92, "learning_rate": 1.79199444114768e-05, "loss": 7.4885, "step": 506000 }, { "epoch": 1.93, "learning_rate": 1.7888244751804346e-05, "loss": 7.4996, "step": 506500 }, { "epoch": 1.93, "learning_rate": 1.785654509213189e-05, "loss": 7.4842, "step": 507000 }, { "epoch": 1.93, "learning_rate": 1.782484543245944e-05, "loss": 7.482, "step": 507500 }, { "epoch": 1.93, "learning_rate": 1.7793145772786983e-05, "loss": 7.5013, "step": 508000 }, { "epoch": 1.93, "learning_rate": 1.776144611311453e-05, "loss": 7.4943, "step": 508500 }, { "epoch": 1.94, "learning_rate": 1.7729746453442076e-05, "loss": 7.4914, "step": 509000 }, { "epoch": 1.94, "learning_rate": 1.7698046793769623e-05, "loss": 7.4987, "step": 509500 }, { "epoch": 1.94, "learning_rate": 1.766634713409717e-05, "loss": 7.4847, "step": 510000 }, { "epoch": 1.94, "learning_rate": 1.7634647474424716e-05, "loss": 7.4991, "step": 510500 }, { "epoch": 1.94, "learning_rate": 1.760294781475226e-05, "loss": 7.5086, "step": 511000 }, { "epoch": 1.95, "learning_rate": 1.757124815507981e-05, "loss": 7.4931, "step": 511500 }, { "epoch": 1.95, "learning_rate": 1.7539548495407353e-05, "loss": 7.4879, "step": 512000 }, { "epoch": 1.95, "learning_rate": 1.75078488357349e-05, "loss": 7.4858, "step": 512500 }, { "epoch": 1.95, "learning_rate": 1.7476149176062447e-05, "loss": 7.4899, "step": 513000 }, { "epoch": 1.95, "learning_rate": 1.7444449516389993e-05, "loss": 7.4803, "step": 513500 }, { "epoch": 1.96, "learning_rate": 1.741274985671754e-05, "loss": 7.4752, "step": 514000 }, { "epoch": 1.96, "learning_rate": 1.7381050197045083e-05, "loss": 7.4754, "step": 514500 }, { "epoch": 1.96, "learning_rate": 1.7349350537372633e-05, "loss": 7.4866, "step": 515000 }, { "epoch": 1.96, "learning_rate": 1.7317650877700177e-05, "loss": 7.4782, "step": 515500 }, { "epoch": 1.96, "learning_rate": 1.7285951218027723e-05, "loss": 7.4994, "step": 516000 }, { "epoch": 1.96, "learning_rate": 1.725425155835527e-05, "loss": 7.4918, "step": 516500 }, { "epoch": 1.97, "learning_rate": 1.7222551898682817e-05, "loss": 7.4669, "step": 517000 }, { "epoch": 1.97, "learning_rate": 1.7190852239010364e-05, "loss": 7.4877, "step": 517500 }, { "epoch": 1.97, "learning_rate": 1.715915257933791e-05, "loss": 7.4764, "step": 518000 }, { "epoch": 1.97, "learning_rate": 1.7127452919665454e-05, "loss": 7.4781, "step": 518500 }, { "epoch": 1.97, "learning_rate": 1.7095753259993004e-05, "loss": 7.486, "step": 519000 }, { "epoch": 1.98, "learning_rate": 1.7064053600320547e-05, "loss": 7.4951, "step": 519500 }, { "epoch": 1.98, "learning_rate": 1.7032353940648094e-05, "loss": 7.4638, "step": 520000 }, { "epoch": 1.98, "learning_rate": 1.700065428097564e-05, "loss": 7.4869, "step": 520500 }, { "epoch": 1.98, "learning_rate": 1.6968954621303187e-05, "loss": 7.4846, "step": 521000 }, { "epoch": 1.98, "learning_rate": 1.6937254961630734e-05, "loss": 7.4765, "step": 521500 }, { "epoch": 1.99, "learning_rate": 1.6905555301958277e-05, "loss": 7.5108, "step": 522000 }, { "epoch": 1.99, "learning_rate": 1.6873855642285824e-05, "loss": 7.4799, "step": 522500 }, { "epoch": 1.99, "learning_rate": 1.6842155982613374e-05, "loss": 7.4881, "step": 523000 }, { "epoch": 1.99, "learning_rate": 1.6810456322940917e-05, "loss": 7.4677, "step": 523500 }, { "epoch": 1.99, "learning_rate": 1.6778756663268464e-05, "loss": 7.4847, "step": 524000 }, { "epoch": 2.0, "learning_rate": 1.674705700359601e-05, "loss": 7.4958, "step": 524500 }, { "epoch": 2.0, "learning_rate": 1.6715357343923557e-05, "loss": 7.4829, "step": 525000 }, { "epoch": 2.0, "learning_rate": 1.6683657684251104e-05, "loss": 7.4905, "step": 525500 }, { "epoch": 2.0, "learning_rate": 1.6651958024578647e-05, "loss": 7.4893, "step": 526000 }, { "epoch": 2.0, "learning_rate": 1.6620258364906194e-05, "loss": 7.4913, "step": 526500 }, { "epoch": 2.0, "learning_rate": 1.658855870523374e-05, "loss": 7.495, "step": 527000 }, { "epoch": 2.01, "learning_rate": 1.6556859045561287e-05, "loss": 7.4846, "step": 527500 }, { "epoch": 2.01, "learning_rate": 1.6525159385888834e-05, "loss": 7.4968, "step": 528000 }, { "epoch": 2.01, "learning_rate": 1.649345972621638e-05, "loss": 7.4936, "step": 528500 }, { "epoch": 2.01, "learning_rate": 1.6461760066543928e-05, "loss": 7.488, "step": 529000 }, { "epoch": 2.01, "learning_rate": 1.643006040687147e-05, "loss": 7.4935, "step": 529500 }, { "epoch": 2.02, "learning_rate": 1.6398360747199018e-05, "loss": 7.485, "step": 530000 }, { "epoch": 2.02, "learning_rate": 1.6366661087526568e-05, "loss": 7.4838, "step": 530500 }, { "epoch": 2.02, "learning_rate": 1.633496142785411e-05, "loss": 7.5033, "step": 531000 }, { "epoch": 2.02, "learning_rate": 1.6303261768181658e-05, "loss": 7.5058, "step": 531500 }, { "epoch": 2.02, "learning_rate": 1.6271562108509204e-05, "loss": 7.4735, "step": 532000 }, { "epoch": 2.03, "learning_rate": 1.623986244883675e-05, "loss": 7.5094, "step": 532500 }, { "epoch": 2.03, "learning_rate": 1.6208162789164298e-05, "loss": 7.468, "step": 533000 }, { "epoch": 2.03, "learning_rate": 1.617646312949184e-05, "loss": 7.4916, "step": 533500 }, { "epoch": 2.03, "learning_rate": 1.6144763469819388e-05, "loss": 7.4981, "step": 534000 }, { "epoch": 2.03, "learning_rate": 1.6113063810146935e-05, "loss": 7.4882, "step": 534500 }, { "epoch": 2.04, "learning_rate": 1.608136415047448e-05, "loss": 7.488, "step": 535000 }, { "epoch": 2.04, "learning_rate": 1.6049664490802028e-05, "loss": 7.4897, "step": 535500 }, { "epoch": 2.04, "learning_rate": 1.601796483112957e-05, "loss": 7.4763, "step": 536000 }, { "epoch": 2.04, "learning_rate": 1.598626517145712e-05, "loss": 7.4969, "step": 536500 }, { "epoch": 2.04, "learning_rate": 1.5954565511784668e-05, "loss": 7.4741, "step": 537000 }, { "epoch": 2.04, "learning_rate": 1.592286585211221e-05, "loss": 7.4865, "step": 537500 }, { "epoch": 2.05, "learning_rate": 1.5891166192439758e-05, "loss": 7.4717, "step": 538000 }, { "epoch": 2.05, "learning_rate": 1.5859466532767305e-05, "loss": 7.496, "step": 538500 }, { "epoch": 2.05, "learning_rate": 1.582776687309485e-05, "loss": 7.4946, "step": 539000 }, { "epoch": 2.05, "learning_rate": 1.5796067213422398e-05, "loss": 7.4919, "step": 539500 }, { "epoch": 2.05, "learning_rate": 1.576436755374994e-05, "loss": 7.4883, "step": 540000 }, { "epoch": 2.06, "learning_rate": 1.573266789407749e-05, "loss": 7.4776, "step": 540500 }, { "epoch": 2.06, "learning_rate": 1.5700968234405035e-05, "loss": 7.4971, "step": 541000 }, { "epoch": 2.06, "learning_rate": 1.566926857473258e-05, "loss": 7.4961, "step": 541500 }, { "epoch": 2.06, "learning_rate": 1.563756891506013e-05, "loss": 7.4906, "step": 542000 }, { "epoch": 2.06, "learning_rate": 1.5605869255387675e-05, "loss": 7.5022, "step": 542500 }, { "epoch": 2.07, "learning_rate": 1.5574169595715222e-05, "loss": 7.4833, "step": 543000 }, { "epoch": 2.07, "learning_rate": 1.5542469936042765e-05, "loss": 7.4884, "step": 543500 }, { "epoch": 2.07, "learning_rate": 1.5510770276370315e-05, "loss": 7.4796, "step": 544000 }, { "epoch": 2.07, "learning_rate": 1.5479070616697862e-05, "loss": 7.4927, "step": 544500 }, { "epoch": 2.07, "learning_rate": 1.5447370957025405e-05, "loss": 7.478, "step": 545000 }, { "epoch": 2.08, "learning_rate": 1.5415671297352952e-05, "loss": 7.4795, "step": 545500 }, { "epoch": 2.08, "learning_rate": 1.53839716376805e-05, "loss": 7.4672, "step": 546000 }, { "epoch": 2.08, "learning_rate": 1.5352271978008045e-05, "loss": 7.4848, "step": 546500 }, { "epoch": 2.08, "learning_rate": 1.5320572318335592e-05, "loss": 7.4975, "step": 547000 }, { "epoch": 2.08, "learning_rate": 1.5288872658663135e-05, "loss": 7.486, "step": 547500 }, { "epoch": 2.08, "learning_rate": 1.5257172998990685e-05, "loss": 7.5042, "step": 548000 }, { "epoch": 2.09, "learning_rate": 1.522547333931823e-05, "loss": 7.4824, "step": 548500 }, { "epoch": 2.09, "learning_rate": 1.5193773679645775e-05, "loss": 7.4905, "step": 549000 }, { "epoch": 2.09, "learning_rate": 1.5162074019973322e-05, "loss": 7.4875, "step": 549500 }, { "epoch": 2.09, "learning_rate": 1.5130374360300869e-05, "loss": 7.4709, "step": 550000 }, { "epoch": 2.09, "learning_rate": 1.5098674700628416e-05, "loss": 7.484, "step": 550500 }, { "epoch": 2.1, "learning_rate": 1.506697504095596e-05, "loss": 7.4856, "step": 551000 }, { "epoch": 2.1, "learning_rate": 1.5035275381283506e-05, "loss": 7.4796, "step": 551500 }, { "epoch": 2.1, "learning_rate": 1.5003575721611054e-05, "loss": 7.4841, "step": 552000 }, { "epoch": 2.1, "learning_rate": 1.49718760619386e-05, "loss": 7.4878, "step": 552500 }, { "epoch": 2.1, "learning_rate": 1.4940176402266146e-05, "loss": 7.5088, "step": 553000 }, { "epoch": 2.11, "learning_rate": 1.490847674259369e-05, "loss": 7.4853, "step": 553500 }, { "epoch": 2.11, "learning_rate": 1.4876777082921239e-05, "loss": 7.4974, "step": 554000 }, { "epoch": 2.11, "learning_rate": 1.4845077423248784e-05, "loss": 7.4835, "step": 554500 }, { "epoch": 2.11, "learning_rate": 1.481337776357633e-05, "loss": 7.4858, "step": 555000 }, { "epoch": 2.11, "learning_rate": 1.4781678103903876e-05, "loss": 7.4657, "step": 555500 }, { "epoch": 2.12, "learning_rate": 1.4749978444231424e-05, "loss": 7.487, "step": 556000 }, { "epoch": 2.12, "learning_rate": 1.471827878455897e-05, "loss": 7.4915, "step": 556500 }, { "epoch": 2.12, "learning_rate": 1.4686579124886516e-05, "loss": 7.4858, "step": 557000 }, { "epoch": 2.12, "learning_rate": 1.4654879465214061e-05, "loss": 7.4788, "step": 557500 }, { "epoch": 2.12, "learning_rate": 1.462317980554161e-05, "loss": 7.4807, "step": 558000 }, { "epoch": 2.12, "learning_rate": 1.4591480145869154e-05, "loss": 7.4823, "step": 558500 }, { "epoch": 2.13, "learning_rate": 1.4559780486196701e-05, "loss": 7.4924, "step": 559000 }, { "epoch": 2.13, "learning_rate": 1.4528080826524248e-05, "loss": 7.4928, "step": 559500 }, { "epoch": 2.13, "learning_rate": 1.4496381166851794e-05, "loss": 7.4887, "step": 560000 }, { "epoch": 2.13, "learning_rate": 1.446468150717934e-05, "loss": 7.487, "step": 560500 }, { "epoch": 2.13, "learning_rate": 1.4432981847506884e-05, "loss": 7.4855, "step": 561000 }, { "epoch": 2.14, "learning_rate": 1.4401282187834433e-05, "loss": 7.4952, "step": 561500 }, { "epoch": 2.14, "learning_rate": 1.436958252816198e-05, "loss": 7.494, "step": 562000 }, { "epoch": 2.14, "learning_rate": 1.4337882868489525e-05, "loss": 7.4929, "step": 562500 }, { "epoch": 2.14, "learning_rate": 1.430618320881707e-05, "loss": 7.4806, "step": 563000 }, { "epoch": 2.14, "learning_rate": 1.4274483549144618e-05, "loss": 7.4835, "step": 563500 }, { "epoch": 2.15, "learning_rate": 1.4242783889472163e-05, "loss": 7.4992, "step": 564000 }, { "epoch": 2.15, "learning_rate": 1.421108422979971e-05, "loss": 7.4782, "step": 564500 }, { "epoch": 2.15, "learning_rate": 1.4179384570127255e-05, "loss": 7.4948, "step": 565000 }, { "epoch": 2.15, "learning_rate": 1.4147684910454803e-05, "loss": 7.4824, "step": 565500 }, { "epoch": 2.15, "learning_rate": 1.4115985250782348e-05, "loss": 7.4814, "step": 566000 }, { "epoch": 2.15, "learning_rate": 1.4084285591109895e-05, "loss": 7.4857, "step": 566500 }, { "epoch": 2.16, "learning_rate": 1.405258593143744e-05, "loss": 7.4909, "step": 567000 }, { "epoch": 2.16, "learning_rate": 1.4020886271764988e-05, "loss": 7.4852, "step": 567500 }, { "epoch": 2.16, "learning_rate": 1.3989186612092533e-05, "loss": 7.4942, "step": 568000 }, { "epoch": 2.16, "learning_rate": 1.3957486952420078e-05, "loss": 7.4776, "step": 568500 }, { "epoch": 2.16, "learning_rate": 1.3925787292747625e-05, "loss": 7.4865, "step": 569000 }, { "epoch": 2.17, "learning_rate": 1.3894087633075173e-05, "loss": 7.479, "step": 569500 }, { "epoch": 2.17, "learning_rate": 1.3862387973402718e-05, "loss": 7.4894, "step": 570000 }, { "epoch": 2.17, "learning_rate": 1.3830688313730263e-05, "loss": 7.4859, "step": 570500 }, { "epoch": 2.17, "learning_rate": 1.379898865405781e-05, "loss": 7.5113, "step": 571000 }, { "epoch": 2.17, "learning_rate": 1.3767288994385358e-05, "loss": 7.4906, "step": 571500 }, { "epoch": 2.18, "learning_rate": 1.3735589334712903e-05, "loss": 7.4839, "step": 572000 }, { "epoch": 2.18, "learning_rate": 1.3703889675040448e-05, "loss": 7.4801, "step": 572500 }, { "epoch": 2.18, "learning_rate": 1.3672190015367995e-05, "loss": 7.4786, "step": 573000 }, { "epoch": 2.18, "learning_rate": 1.3640490355695542e-05, "loss": 7.4568, "step": 573500 }, { "epoch": 2.18, "learning_rate": 1.3608790696023089e-05, "loss": 7.4822, "step": 574000 }, { "epoch": 2.19, "learning_rate": 1.3577091036350634e-05, "loss": 7.5005, "step": 574500 }, { "epoch": 2.19, "learning_rate": 1.3545391376678179e-05, "loss": 7.4834, "step": 575000 }, { "epoch": 2.19, "learning_rate": 1.3513691717005727e-05, "loss": 7.4912, "step": 575500 }, { "epoch": 2.19, "learning_rate": 1.3481992057333274e-05, "loss": 7.489, "step": 576000 }, { "epoch": 2.19, "learning_rate": 1.3450292397660819e-05, "loss": 7.4785, "step": 576500 }, { "epoch": 2.19, "learning_rate": 1.3418592737988367e-05, "loss": 7.4709, "step": 577000 }, { "epoch": 2.2, "learning_rate": 1.3386893078315912e-05, "loss": 7.5052, "step": 577500 }, { "epoch": 2.2, "learning_rate": 1.3355193418643457e-05, "loss": 7.4848, "step": 578000 }, { "epoch": 2.2, "learning_rate": 1.3323493758971004e-05, "loss": 7.4859, "step": 578500 }, { "epoch": 2.2, "learning_rate": 1.3291794099298552e-05, "loss": 7.4929, "step": 579000 }, { "epoch": 2.2, "learning_rate": 1.3260094439626097e-05, "loss": 7.4868, "step": 579500 }, { "epoch": 2.21, "learning_rate": 1.3228394779953642e-05, "loss": 7.4788, "step": 580000 }, { "epoch": 2.21, "learning_rate": 1.3196695120281189e-05, "loss": 7.4951, "step": 580500 }, { "epoch": 2.21, "learning_rate": 1.3164995460608737e-05, "loss": 7.4951, "step": 581000 }, { "epoch": 2.21, "learning_rate": 1.3133295800936282e-05, "loss": 7.4861, "step": 581500 }, { "epoch": 2.21, "learning_rate": 1.3101596141263827e-05, "loss": 7.4999, "step": 582000 }, { "epoch": 2.22, "learning_rate": 1.3069896481591372e-05, "loss": 7.4914, "step": 582500 }, { "epoch": 2.22, "learning_rate": 1.303819682191892e-05, "loss": 7.49, "step": 583000 }, { "epoch": 2.22, "learning_rate": 1.3006497162246468e-05, "loss": 7.4875, "step": 583500 }, { "epoch": 2.22, "learning_rate": 1.2974797502574013e-05, "loss": 7.5048, "step": 584000 }, { "epoch": 2.22, "learning_rate": 1.2943097842901558e-05, "loss": 7.4962, "step": 584500 }, { "epoch": 2.23, "learning_rate": 1.2911398183229106e-05, "loss": 7.4774, "step": 585000 }, { "epoch": 2.23, "learning_rate": 1.2879698523556653e-05, "loss": 7.4836, "step": 585500 }, { "epoch": 2.23, "learning_rate": 1.2847998863884198e-05, "loss": 7.502, "step": 586000 }, { "epoch": 2.23, "learning_rate": 1.2816299204211743e-05, "loss": 7.4826, "step": 586500 }, { "epoch": 2.23, "learning_rate": 1.2784599544539291e-05, "loss": 7.4802, "step": 587000 }, { "epoch": 2.23, "learning_rate": 1.2752899884866836e-05, "loss": 7.4735, "step": 587500 }, { "epoch": 2.24, "learning_rate": 1.2721200225194383e-05, "loss": 7.4813, "step": 588000 }, { "epoch": 2.24, "learning_rate": 1.2689500565521928e-05, "loss": 7.4912, "step": 588500 }, { "epoch": 2.24, "learning_rate": 1.2657800905849476e-05, "loss": 7.483, "step": 589000 }, { "epoch": 2.24, "learning_rate": 1.2626101246177021e-05, "loss": 7.4972, "step": 589500 }, { "epoch": 2.24, "learning_rate": 1.2594401586504568e-05, "loss": 7.4872, "step": 590000 }, { "epoch": 2.25, "learning_rate": 1.2562701926832113e-05, "loss": 7.4886, "step": 590500 }, { "epoch": 2.25, "learning_rate": 1.2531002267159661e-05, "loss": 7.4957, "step": 591000 }, { "epoch": 2.25, "learning_rate": 1.2499302607487206e-05, "loss": 7.495, "step": 591500 }, { "epoch": 2.25, "learning_rate": 1.2467602947814753e-05, "loss": 7.472, "step": 592000 }, { "epoch": 2.25, "learning_rate": 1.24359032881423e-05, "loss": 7.485, "step": 592500 }, { "epoch": 2.26, "learning_rate": 1.2404203628469846e-05, "loss": 7.4938, "step": 593000 }, { "epoch": 2.26, "learning_rate": 1.2372503968797391e-05, "loss": 7.4972, "step": 593500 }, { "epoch": 2.26, "learning_rate": 1.2340804309124938e-05, "loss": 7.485, "step": 594000 }, { "epoch": 2.26, "learning_rate": 1.2309104649452483e-05, "loss": 7.4855, "step": 594500 }, { "epoch": 2.26, "learning_rate": 1.2277404989780032e-05, "loss": 7.488, "step": 595000 }, { "epoch": 2.27, "learning_rate": 1.2245705330107577e-05, "loss": 7.4869, "step": 595500 }, { "epoch": 2.27, "learning_rate": 1.2214005670435123e-05, "loss": 7.4852, "step": 596000 }, { "epoch": 2.27, "learning_rate": 1.2182306010762668e-05, "loss": 7.4858, "step": 596500 }, { "epoch": 2.27, "learning_rate": 1.2150606351090215e-05, "loss": 7.4779, "step": 597000 }, { "epoch": 2.27, "learning_rate": 1.2118906691417762e-05, "loss": 7.4977, "step": 597500 }, { "epoch": 2.27, "learning_rate": 1.2087207031745308e-05, "loss": 7.4837, "step": 598000 }, { "epoch": 2.28, "learning_rate": 1.2055507372072853e-05, "loss": 7.4878, "step": 598500 }, { "epoch": 2.28, "learning_rate": 1.20238077124004e-05, "loss": 7.5044, "step": 599000 }, { "epoch": 2.28, "learning_rate": 1.1992108052727947e-05, "loss": 7.4806, "step": 599500 }, { "epoch": 2.28, "learning_rate": 1.1960408393055494e-05, "loss": 7.4817, "step": 600000 }, { "epoch": 2.28, "learning_rate": 1.1928708733383039e-05, "loss": 7.4771, "step": 600500 }, { "epoch": 2.29, "learning_rate": 1.1897009073710585e-05, "loss": 7.4823, "step": 601000 }, { "epoch": 2.29, "learning_rate": 1.186530941403813e-05, "loss": 7.4791, "step": 601500 }, { "epoch": 2.29, "learning_rate": 1.1833609754365679e-05, "loss": 7.5043, "step": 602000 }, { "epoch": 2.29, "learning_rate": 1.1801910094693224e-05, "loss": 7.493, "step": 602500 }, { "epoch": 2.29, "learning_rate": 1.177021043502077e-05, "loss": 7.4888, "step": 603000 }, { "epoch": 2.3, "learning_rate": 1.1738510775348315e-05, "loss": 7.4986, "step": 603500 }, { "epoch": 2.3, "learning_rate": 1.1706811115675862e-05, "loss": 7.4765, "step": 604000 }, { "epoch": 2.3, "learning_rate": 1.1675111456003409e-05, "loss": 7.4994, "step": 604500 }, { "epoch": 2.3, "learning_rate": 1.1643411796330955e-05, "loss": 7.4915, "step": 605000 }, { "epoch": 2.3, "learning_rate": 1.16117121366585e-05, "loss": 7.4944, "step": 605500 }, { "epoch": 2.31, "learning_rate": 1.1580012476986047e-05, "loss": 7.4793, "step": 606000 }, { "epoch": 2.31, "learning_rate": 1.1548312817313594e-05, "loss": 7.4996, "step": 606500 }, { "epoch": 2.31, "learning_rate": 1.151661315764114e-05, "loss": 7.4913, "step": 607000 }, { "epoch": 2.31, "learning_rate": 1.1484913497968687e-05, "loss": 7.4759, "step": 607500 }, { "epoch": 2.31, "learning_rate": 1.1453213838296232e-05, "loss": 7.4876, "step": 608000 }, { "epoch": 2.31, "learning_rate": 1.1421514178623779e-05, "loss": 7.4641, "step": 608500 }, { "epoch": 2.32, "learning_rate": 1.1389814518951326e-05, "loss": 7.4921, "step": 609000 }, { "epoch": 2.32, "learning_rate": 1.1358114859278872e-05, "loss": 7.4813, "step": 609500 }, { "epoch": 2.32, "learning_rate": 1.1326415199606417e-05, "loss": 7.5045, "step": 610000 }, { "epoch": 2.32, "learning_rate": 1.1294715539933964e-05, "loss": 7.4985, "step": 610500 }, { "epoch": 2.32, "learning_rate": 1.1263015880261509e-05, "loss": 7.4815, "step": 611000 }, { "epoch": 2.33, "learning_rate": 1.1231316220589058e-05, "loss": 7.4762, "step": 611500 }, { "epoch": 2.33, "learning_rate": 1.1199616560916603e-05, "loss": 7.4714, "step": 612000 }, { "epoch": 2.33, "learning_rate": 1.116791690124415e-05, "loss": 7.5055, "step": 612500 }, { "epoch": 2.33, "learning_rate": 1.1136217241571694e-05, "loss": 7.5009, "step": 613000 }, { "epoch": 2.33, "learning_rate": 1.1104517581899241e-05, "loss": 7.4863, "step": 613500 }, { "epoch": 2.34, "learning_rate": 1.1072817922226788e-05, "loss": 7.4795, "step": 614000 }, { "epoch": 2.34, "learning_rate": 1.1041118262554334e-05, "loss": 7.4836, "step": 614500 }, { "epoch": 2.34, "learning_rate": 1.100941860288188e-05, "loss": 7.5069, "step": 615000 }, { "epoch": 2.34, "learning_rate": 1.0977718943209426e-05, "loss": 7.495, "step": 615500 }, { "epoch": 2.34, "learning_rate": 1.0946019283536973e-05, "loss": 7.4857, "step": 616000 }, { "epoch": 2.35, "learning_rate": 1.091431962386452e-05, "loss": 7.4908, "step": 616500 }, { "epoch": 2.35, "learning_rate": 1.0882619964192065e-05, "loss": 7.482, "step": 617000 }, { "epoch": 2.35, "learning_rate": 1.0850920304519611e-05, "loss": 7.4732, "step": 617500 }, { "epoch": 2.35, "learning_rate": 1.0819220644847156e-05, "loss": 7.4855, "step": 618000 }, { "epoch": 2.35, "learning_rate": 1.0787520985174705e-05, "loss": 7.4918, "step": 618500 }, { "epoch": 2.35, "learning_rate": 1.075582132550225e-05, "loss": 7.4965, "step": 619000 }, { "epoch": 2.36, "learning_rate": 1.0724121665829796e-05, "loss": 7.4942, "step": 619500 }, { "epoch": 2.36, "learning_rate": 1.0692422006157341e-05, "loss": 7.4977, "step": 620000 }, { "epoch": 2.36, "learning_rate": 1.0660722346484888e-05, "loss": 7.4783, "step": 620500 }, { "epoch": 2.36, "learning_rate": 1.0629022686812435e-05, "loss": 7.4989, "step": 621000 }, { "epoch": 2.36, "learning_rate": 1.0597323027139981e-05, "loss": 7.4745, "step": 621500 }, { "epoch": 2.37, "learning_rate": 1.0565623367467526e-05, "loss": 7.4794, "step": 622000 }, { "epoch": 2.37, "learning_rate": 1.0533923707795073e-05, "loss": 7.4847, "step": 622500 }, { "epoch": 2.37, "learning_rate": 1.050222404812262e-05, "loss": 7.4982, "step": 623000 }, { "epoch": 2.37, "learning_rate": 1.0470524388450167e-05, "loss": 7.503, "step": 623500 }, { "epoch": 2.37, "learning_rate": 1.0438824728777713e-05, "loss": 7.4928, "step": 624000 }, { "epoch": 2.38, "learning_rate": 1.0407125069105258e-05, "loss": 7.479, "step": 624500 }, { "epoch": 2.38, "learning_rate": 1.0375425409432805e-05, "loss": 7.4936, "step": 625000 }, { "epoch": 2.38, "learning_rate": 1.0343725749760352e-05, "loss": 7.4868, "step": 625500 }, { "epoch": 2.38, "learning_rate": 1.0312026090087898e-05, "loss": 7.4826, "step": 626000 }, { "epoch": 2.38, "learning_rate": 1.0280326430415443e-05, "loss": 7.4834, "step": 626500 }, { "epoch": 2.39, "learning_rate": 1.024862677074299e-05, "loss": 7.4945, "step": 627000 }, { "epoch": 2.39, "learning_rate": 1.0216927111070535e-05, "loss": 7.4836, "step": 627500 }, { "epoch": 2.39, "learning_rate": 1.0185227451398084e-05, "loss": 7.4862, "step": 628000 }, { "epoch": 2.39, "learning_rate": 1.0153527791725629e-05, "loss": 7.4984, "step": 628500 }, { "epoch": 2.39, "learning_rate": 1.0121828132053175e-05, "loss": 7.4911, "step": 629000 }, { "epoch": 2.39, "learning_rate": 1.009012847238072e-05, "loss": 7.4969, "step": 629500 }, { "epoch": 2.4, "learning_rate": 1.0058428812708267e-05, "loss": 7.482, "step": 630000 }, { "epoch": 2.4, "learning_rate": 1.0026729153035814e-05, "loss": 7.4814, "step": 630500 }, { "epoch": 2.4, "learning_rate": 9.99502949336336e-06, "loss": 7.498, "step": 631000 }, { "epoch": 2.4, "learning_rate": 9.963329833690905e-06, "loss": 7.4941, "step": 631500 }, { "epoch": 2.4, "learning_rate": 9.931630174018452e-06, "loss": 7.5052, "step": 632000 }, { "epoch": 2.41, "learning_rate": 9.899930514345999e-06, "loss": 7.4824, "step": 632500 }, { "epoch": 2.41, "learning_rate": 9.868230854673545e-06, "loss": 7.4937, "step": 633000 }, { "epoch": 2.41, "learning_rate": 9.83653119500109e-06, "loss": 7.5007, "step": 633500 }, { "epoch": 2.41, "learning_rate": 9.804831535328637e-06, "loss": 7.4825, "step": 634000 }, { "epoch": 2.41, "learning_rate": 9.773131875656182e-06, "loss": 7.4892, "step": 634500 }, { "epoch": 2.42, "learning_rate": 9.74143221598373e-06, "loss": 7.4957, "step": 635000 }, { "epoch": 2.42, "learning_rate": 9.709732556311276e-06, "loss": 7.4823, "step": 635500 }, { "epoch": 2.42, "learning_rate": 9.678032896638822e-06, "loss": 7.486, "step": 636000 }, { "epoch": 2.42, "learning_rate": 9.646333236966367e-06, "loss": 7.4864, "step": 636500 }, { "epoch": 2.42, "learning_rate": 9.614633577293914e-06, "loss": 7.4859, "step": 637000 }, { "epoch": 2.43, "learning_rate": 9.58293391762146e-06, "loss": 7.4927, "step": 637500 }, { "epoch": 2.43, "learning_rate": 9.551234257949007e-06, "loss": 7.4718, "step": 638000 }, { "epoch": 2.43, "learning_rate": 9.519534598276552e-06, "loss": 7.4904, "step": 638500 }, { "epoch": 2.43, "learning_rate": 9.4878349386041e-06, "loss": 7.4748, "step": 639000 }, { "epoch": 2.43, "learning_rate": 9.456135278931646e-06, "loss": 7.5012, "step": 639500 }, { "epoch": 2.43, "learning_rate": 9.424435619259193e-06, "loss": 7.4837, "step": 640000 }, { "epoch": 2.44, "learning_rate": 9.39273595958674e-06, "loss": 7.4662, "step": 640500 }, { "epoch": 2.44, "learning_rate": 9.361036299914284e-06, "loss": 7.4819, "step": 641000 }, { "epoch": 2.44, "learning_rate": 9.329336640241831e-06, "loss": 7.4823, "step": 641500 }, { "epoch": 2.44, "learning_rate": 9.297636980569378e-06, "loss": 7.4812, "step": 642000 }, { "epoch": 2.44, "learning_rate": 9.265937320896924e-06, "loss": 7.4908, "step": 642500 }, { "epoch": 2.45, "learning_rate": 9.23423766122447e-06, "loss": 7.487, "step": 643000 }, { "epoch": 2.45, "learning_rate": 9.202538001552016e-06, "loss": 7.4879, "step": 643500 }, { "epoch": 2.45, "learning_rate": 9.170838341879561e-06, "loss": 7.4821, "step": 644000 }, { "epoch": 2.45, "learning_rate": 9.13913868220711e-06, "loss": 7.5029, "step": 644500 }, { "epoch": 2.45, "learning_rate": 9.107439022534655e-06, "loss": 7.4918, "step": 645000 }, { "epoch": 2.46, "learning_rate": 9.075739362862201e-06, "loss": 7.4751, "step": 645500 }, { "epoch": 2.46, "learning_rate": 9.044039703189746e-06, "loss": 7.4986, "step": 646000 }, { "epoch": 2.46, "learning_rate": 9.012340043517293e-06, "loss": 7.495, "step": 646500 }, { "epoch": 2.46, "learning_rate": 8.98064038384484e-06, "loss": 7.4812, "step": 647000 }, { "epoch": 2.46, "learning_rate": 8.948940724172386e-06, "loss": 7.489, "step": 647500 }, { "epoch": 2.46, "learning_rate": 8.917241064499931e-06, "loss": 7.4895, "step": 648000 }, { "epoch": 2.47, "learning_rate": 8.885541404827478e-06, "loss": 7.4865, "step": 648500 }, { "epoch": 2.47, "learning_rate": 8.853841745155025e-06, "loss": 7.4895, "step": 649000 }, { "epoch": 2.47, "learning_rate": 8.822142085482571e-06, "loss": 7.4767, "step": 649500 }, { "epoch": 2.47, "learning_rate": 8.790442425810116e-06, "loss": 7.4838, "step": 650000 }, { "epoch": 2.47, "learning_rate": 8.758742766137663e-06, "loss": 7.4996, "step": 650500 }, { "epoch": 2.48, "learning_rate": 8.727043106465208e-06, "loss": 7.4857, "step": 651000 }, { "epoch": 2.48, "learning_rate": 8.695343446792757e-06, "loss": 7.4907, "step": 651500 }, { "epoch": 2.48, "learning_rate": 8.663643787120302e-06, "loss": 7.4794, "step": 652000 }, { "epoch": 2.48, "learning_rate": 8.631944127447848e-06, "loss": 7.4902, "step": 652500 }, { "epoch": 2.48, "learning_rate": 8.600244467775393e-06, "loss": 7.4896, "step": 653000 }, { "epoch": 2.49, "learning_rate": 8.56854480810294e-06, "loss": 7.482, "step": 653500 }, { "epoch": 2.49, "learning_rate": 8.536845148430487e-06, "loss": 7.4846, "step": 654000 }, { "epoch": 2.49, "learning_rate": 8.505145488758033e-06, "loss": 7.4953, "step": 654500 }, { "epoch": 2.49, "learning_rate": 8.473445829085578e-06, "loss": 7.4905, "step": 655000 }, { "epoch": 2.49, "learning_rate": 8.441746169413125e-06, "loss": 7.4971, "step": 655500 }, { "epoch": 2.5, "learning_rate": 8.410046509740672e-06, "loss": 7.4926, "step": 656000 }, { "epoch": 2.5, "learning_rate": 8.378346850068219e-06, "loss": 7.4864, "step": 656500 }, { "epoch": 2.5, "learning_rate": 8.346647190395765e-06, "loss": 7.4681, "step": 657000 }, { "epoch": 2.5, "learning_rate": 8.31494753072331e-06, "loss": 7.4832, "step": 657500 }, { "epoch": 2.5, "learning_rate": 8.283247871050857e-06, "loss": 7.4953, "step": 658000 }, { "epoch": 2.5, "learning_rate": 8.251548211378404e-06, "loss": 7.4854, "step": 658500 }, { "epoch": 2.51, "learning_rate": 8.21984855170595e-06, "loss": 7.4948, "step": 659000 }, { "epoch": 2.51, "learning_rate": 8.188148892033495e-06, "loss": 7.4983, "step": 659500 }, { "epoch": 2.51, "learning_rate": 8.156449232361042e-06, "loss": 7.4777, "step": 660000 }, { "epoch": 2.51, "learning_rate": 8.124749572688587e-06, "loss": 7.4798, "step": 660500 }, { "epoch": 2.51, "learning_rate": 8.093049913016136e-06, "loss": 7.4792, "step": 661000 }, { "epoch": 2.52, "learning_rate": 8.06135025334368e-06, "loss": 7.4759, "step": 661500 }, { "epoch": 2.52, "learning_rate": 8.029650593671227e-06, "loss": 7.4847, "step": 662000 }, { "epoch": 2.52, "learning_rate": 7.997950933998772e-06, "loss": 7.4837, "step": 662500 }, { "epoch": 2.52, "learning_rate": 7.966251274326319e-06, "loss": 7.4778, "step": 663000 }, { "epoch": 2.52, "learning_rate": 7.934551614653866e-06, "loss": 7.4882, "step": 663500 }, { "epoch": 2.53, "learning_rate": 7.902851954981412e-06, "loss": 7.4778, "step": 664000 }, { "epoch": 2.53, "learning_rate": 7.871152295308957e-06, "loss": 7.4866, "step": 664500 }, { "epoch": 2.53, "learning_rate": 7.839452635636504e-06, "loss": 7.4954, "step": 665000 }, { "epoch": 2.53, "learning_rate": 7.80775297596405e-06, "loss": 7.4884, "step": 665500 }, { "epoch": 2.53, "learning_rate": 7.776053316291597e-06, "loss": 7.4751, "step": 666000 }, { "epoch": 2.54, "learning_rate": 7.744353656619142e-06, "loss": 7.4712, "step": 666500 }, { "epoch": 2.54, "learning_rate": 7.71265399694669e-06, "loss": 7.4923, "step": 667000 }, { "epoch": 2.54, "learning_rate": 7.680954337274234e-06, "loss": 7.4964, "step": 667500 }, { "epoch": 2.54, "learning_rate": 7.649254677601783e-06, "loss": 7.499, "step": 668000 }, { "epoch": 2.54, "learning_rate": 7.617555017929328e-06, "loss": 7.4754, "step": 668500 }, { "epoch": 2.54, "learning_rate": 7.585855358256874e-06, "loss": 7.4836, "step": 669000 }, { "epoch": 2.55, "learning_rate": 7.554155698584419e-06, "loss": 7.488, "step": 669500 }, { "epoch": 2.55, "learning_rate": 7.522456038911967e-06, "loss": 7.4923, "step": 670000 }, { "epoch": 2.55, "learning_rate": 7.490756379239512e-06, "loss": 7.4793, "step": 670500 }, { "epoch": 2.55, "learning_rate": 7.4590567195670594e-06, "loss": 7.4939, "step": 671000 }, { "epoch": 2.55, "learning_rate": 7.427357059894606e-06, "loss": 7.487, "step": 671500 }, { "epoch": 2.56, "learning_rate": 7.395657400222151e-06, "loss": 7.4868, "step": 672000 }, { "epoch": 2.56, "learning_rate": 7.363957740549699e-06, "loss": 7.4842, "step": 672500 }, { "epoch": 2.56, "learning_rate": 7.332258080877244e-06, "loss": 7.4822, "step": 673000 }, { "epoch": 2.56, "learning_rate": 7.300558421204791e-06, "loss": 7.4852, "step": 673500 }, { "epoch": 2.56, "learning_rate": 7.268858761532336e-06, "loss": 7.4891, "step": 674000 }, { "epoch": 2.57, "learning_rate": 7.237159101859883e-06, "loss": 7.4841, "step": 674500 }, { "epoch": 2.57, "learning_rate": 7.205459442187429e-06, "loss": 7.4945, "step": 675000 }, { "epoch": 2.57, "learning_rate": 7.1737597825149755e-06, "loss": 7.4866, "step": 675500 }, { "epoch": 2.57, "learning_rate": 7.142060122842521e-06, "loss": 7.488, "step": 676000 }, { "epoch": 2.57, "learning_rate": 7.110360463170068e-06, "loss": 7.4688, "step": 676500 }, { "epoch": 2.58, "learning_rate": 7.078660803497614e-06, "loss": 7.4729, "step": 677000 }, { "epoch": 2.58, "learning_rate": 7.046961143825161e-06, "loss": 7.479, "step": 677500 }, { "epoch": 2.58, "learning_rate": 7.0152614841527065e-06, "loss": 7.4898, "step": 678000 }, { "epoch": 2.58, "learning_rate": 6.983561824480253e-06, "loss": 7.5005, "step": 678500 }, { "epoch": 2.58, "learning_rate": 6.951862164807798e-06, "loss": 7.4854, "step": 679000 }, { "epoch": 2.58, "learning_rate": 6.920162505135346e-06, "loss": 7.4771, "step": 679500 }, { "epoch": 2.59, "learning_rate": 6.888462845462891e-06, "loss": 7.4996, "step": 680000 }, { "epoch": 2.59, "learning_rate": 6.856763185790438e-06, "loss": 7.5023, "step": 680500 }, { "epoch": 2.59, "learning_rate": 6.825063526117983e-06, "loss": 7.4873, "step": 681000 }, { "epoch": 2.59, "learning_rate": 6.79336386644553e-06, "loss": 7.4781, "step": 681500 }, { "epoch": 2.59, "learning_rate": 6.761664206773076e-06, "loss": 7.4824, "step": 682000 }, { "epoch": 2.6, "learning_rate": 6.729964547100623e-06, "loss": 7.4792, "step": 682500 }, { "epoch": 2.6, "learning_rate": 6.6982648874281685e-06, "loss": 7.4833, "step": 683000 }, { "epoch": 2.6, "learning_rate": 6.666565227755715e-06, "loss": 7.4883, "step": 683500 }, { "epoch": 2.6, "learning_rate": 6.634865568083261e-06, "loss": 7.4876, "step": 684000 }, { "epoch": 2.6, "learning_rate": 6.603165908410808e-06, "loss": 7.4966, "step": 684500 }, { "epoch": 2.61, "learning_rate": 6.571466248738354e-06, "loss": 7.494, "step": 685000 }, { "epoch": 2.61, "learning_rate": 6.5397665890659e-06, "loss": 7.4769, "step": 685500 }, { "epoch": 2.61, "learning_rate": 6.508066929393445e-06, "loss": 7.4921, "step": 686000 }, { "epoch": 2.61, "learning_rate": 6.476367269720993e-06, "loss": 7.4946, "step": 686500 }, { "epoch": 2.61, "learning_rate": 6.444667610048538e-06, "loss": 7.4877, "step": 687000 }, { "epoch": 2.62, "learning_rate": 6.4129679503760854e-06, "loss": 7.4814, "step": 687500 }, { "epoch": 2.62, "learning_rate": 6.381268290703632e-06, "loss": 7.4903, "step": 688000 }, { "epoch": 2.62, "learning_rate": 6.349568631031177e-06, "loss": 7.4983, "step": 688500 }, { "epoch": 2.62, "learning_rate": 6.317868971358725e-06, "loss": 7.4839, "step": 689000 }, { "epoch": 2.62, "learning_rate": 6.28616931168627e-06, "loss": 7.5113, "step": 689500 }, { "epoch": 2.62, "learning_rate": 6.254469652013817e-06, "loss": 7.4889, "step": 690000 }, { "epoch": 2.63, "learning_rate": 6.222769992341362e-06, "loss": 7.4904, "step": 690500 }, { "epoch": 2.63, "learning_rate": 6.191070332668909e-06, "loss": 7.4978, "step": 691000 }, { "epoch": 2.63, "learning_rate": 6.159370672996455e-06, "loss": 7.4832, "step": 691500 }, { "epoch": 2.63, "learning_rate": 6.127671013324001e-06, "loss": 7.485, "step": 692000 }, { "epoch": 2.63, "learning_rate": 6.095971353651547e-06, "loss": 7.4716, "step": 692500 }, { "epoch": 2.64, "learning_rate": 6.064271693979093e-06, "loss": 7.5012, "step": 693000 }, { "epoch": 2.64, "learning_rate": 6.03257203430664e-06, "loss": 7.4798, "step": 693500 }, { "epoch": 2.64, "learning_rate": 6.000872374634186e-06, "loss": 7.4806, "step": 694000 }, { "epoch": 2.64, "learning_rate": 5.9691727149617325e-06, "loss": 7.486, "step": 694500 }, { "epoch": 2.64, "learning_rate": 5.937473055289278e-06, "loss": 7.4836, "step": 695000 }, { "epoch": 2.65, "learning_rate": 5.905773395616825e-06, "loss": 7.4876, "step": 695500 }, { "epoch": 2.65, "learning_rate": 5.874073735944372e-06, "loss": 7.468, "step": 696000 }, { "epoch": 2.65, "learning_rate": 5.842374076271918e-06, "loss": 7.4871, "step": 696500 }, { "epoch": 2.65, "learning_rate": 5.810674416599464e-06, "loss": 7.493, "step": 697000 }, { "epoch": 2.65, "learning_rate": 5.77897475692701e-06, "loss": 7.4734, "step": 697500 }, { "epoch": 2.66, "learning_rate": 5.747275097254556e-06, "loss": 7.4704, "step": 698000 }, { "epoch": 2.66, "learning_rate": 5.715575437582103e-06, "loss": 7.4867, "step": 698500 }, { "epoch": 2.66, "learning_rate": 5.683875777909649e-06, "loss": 7.4739, "step": 699000 }, { "epoch": 2.66, "learning_rate": 5.652176118237195e-06, "loss": 7.4763, "step": 699500 }, { "epoch": 2.66, "learning_rate": 5.620476458564741e-06, "loss": 7.483, "step": 700000 }, { "epoch": 2.66, "learning_rate": 5.588776798892288e-06, "loss": 7.4921, "step": 700500 }, { "epoch": 2.67, "learning_rate": 5.557077139219834e-06, "loss": 7.4833, "step": 701000 }, { "epoch": 2.67, "learning_rate": 5.52537747954738e-06, "loss": 7.497, "step": 701500 }, { "epoch": 2.67, "learning_rate": 5.493677819874926e-06, "loss": 7.4893, "step": 702000 }, { "epoch": 2.67, "learning_rate": 5.461978160202472e-06, "loss": 7.4757, "step": 702500 }, { "epoch": 2.67, "learning_rate": 5.430278500530019e-06, "loss": 7.4707, "step": 703000 }, { "epoch": 2.68, "learning_rate": 5.398578840857565e-06, "loss": 7.4749, "step": 703500 }, { "epoch": 2.68, "learning_rate": 5.366879181185111e-06, "loss": 7.4802, "step": 704000 }, { "epoch": 2.68, "learning_rate": 5.335179521512657e-06, "loss": 7.4991, "step": 704500 }, { "epoch": 2.68, "learning_rate": 5.303479861840203e-06, "loss": 7.4917, "step": 705000 }, { "epoch": 2.68, "learning_rate": 5.27178020216775e-06, "loss": 7.4783, "step": 705500 }, { "epoch": 2.69, "learning_rate": 5.240080542495296e-06, "loss": 7.4963, "step": 706000 }, { "epoch": 2.69, "learning_rate": 5.208380882822842e-06, "loss": 7.4929, "step": 706500 }, { "epoch": 2.69, "learning_rate": 5.176681223150388e-06, "loss": 7.4678, "step": 707000 }, { "epoch": 2.69, "learning_rate": 5.144981563477935e-06, "loss": 7.5026, "step": 707500 }, { "epoch": 2.69, "learning_rate": 5.113281903805481e-06, "loss": 7.4733, "step": 708000 }, { "epoch": 2.7, "learning_rate": 5.081582244133027e-06, "loss": 7.4956, "step": 708500 }, { "epoch": 2.7, "learning_rate": 5.049882584460573e-06, "loss": 7.4927, "step": 709000 }, { "epoch": 2.7, "learning_rate": 5.018182924788119e-06, "loss": 7.4927, "step": 709500 }, { "epoch": 2.7, "learning_rate": 4.986483265115666e-06, "loss": 7.4829, "step": 710000 }, { "epoch": 2.7, "learning_rate": 4.954783605443212e-06, "loss": 7.4863, "step": 710500 }, { "epoch": 2.7, "learning_rate": 4.9230839457707585e-06, "loss": 7.4888, "step": 711000 }, { "epoch": 2.71, "learning_rate": 4.891384286098305e-06, "loss": 7.4863, "step": 711500 }, { "epoch": 2.71, "learning_rate": 4.859684626425851e-06, "loss": 7.4832, "step": 712000 }, { "epoch": 2.71, "learning_rate": 4.827984966753398e-06, "loss": 7.4808, "step": 712500 }, { "epoch": 2.71, "learning_rate": 4.796285307080944e-06, "loss": 7.4834, "step": 713000 }, { "epoch": 2.71, "learning_rate": 4.7645856474084895e-06, "loss": 7.4914, "step": 713500 }, { "epoch": 2.72, "learning_rate": 4.732885987736036e-06, "loss": 7.4729, "step": 714000 }, { "epoch": 2.72, "learning_rate": 4.701186328063582e-06, "loss": 7.4726, "step": 714500 }, { "epoch": 2.72, "learning_rate": 4.669486668391129e-06, "loss": 7.4795, "step": 715000 }, { "epoch": 2.72, "learning_rate": 4.637787008718675e-06, "loss": 7.4856, "step": 715500 }, { "epoch": 2.72, "learning_rate": 4.606087349046221e-06, "loss": 7.4836, "step": 716000 }, { "epoch": 2.73, "learning_rate": 4.574387689373767e-06, "loss": 7.4901, "step": 716500 }, { "epoch": 2.73, "learning_rate": 4.542688029701313e-06, "loss": 7.4942, "step": 717000 }, { "epoch": 2.73, "learning_rate": 4.51098837002886e-06, "loss": 7.4943, "step": 717500 }, { "epoch": 2.73, "learning_rate": 4.479288710356406e-06, "loss": 7.4813, "step": 718000 }, { "epoch": 2.73, "learning_rate": 4.447589050683952e-06, "loss": 7.4915, "step": 718500 }, { "epoch": 2.74, "learning_rate": 4.415889391011498e-06, "loss": 7.4744, "step": 719000 }, { "epoch": 2.74, "learning_rate": 4.384189731339045e-06, "loss": 7.4867, "step": 719500 }, { "epoch": 2.74, "learning_rate": 4.352490071666591e-06, "loss": 7.4755, "step": 720000 }, { "epoch": 2.74, "learning_rate": 4.3207904119941366e-06, "loss": 7.4858, "step": 720500 }, { "epoch": 2.74, "learning_rate": 4.289090752321683e-06, "loss": 7.4846, "step": 721000 }, { "epoch": 2.74, "learning_rate": 4.257391092649229e-06, "loss": 7.4888, "step": 721500 }, { "epoch": 2.75, "learning_rate": 4.225691432976776e-06, "loss": 7.4856, "step": 722000 }, { "epoch": 2.75, "learning_rate": 4.193991773304322e-06, "loss": 7.5096, "step": 722500 }, { "epoch": 2.75, "learning_rate": 4.162292113631868e-06, "loss": 7.4782, "step": 723000 }, { "epoch": 2.75, "learning_rate": 4.130592453959414e-06, "loss": 7.4777, "step": 723500 }, { "epoch": 2.75, "learning_rate": 4.09889279428696e-06, "loss": 7.4838, "step": 724000 }, { "epoch": 2.76, "learning_rate": 4.067193134614507e-06, "loss": 7.4883, "step": 724500 }, { "epoch": 2.76, "learning_rate": 4.035493474942053e-06, "loss": 7.4824, "step": 725000 }, { "epoch": 2.76, "learning_rate": 4.003793815269599e-06, "loss": 7.4915, "step": 725500 }, { "epoch": 2.76, "learning_rate": 3.972094155597145e-06, "loss": 7.4854, "step": 726000 }, { "epoch": 2.76, "learning_rate": 3.940394495924692e-06, "loss": 7.4829, "step": 726500 }, { "epoch": 2.77, "learning_rate": 3.908694836252238e-06, "loss": 7.4947, "step": 727000 }, { "epoch": 2.77, "learning_rate": 3.8769951765797845e-06, "loss": 7.49, "step": 727500 }, { "epoch": 2.77, "learning_rate": 3.845295516907331e-06, "loss": 7.4696, "step": 728000 }, { "epoch": 2.77, "learning_rate": 3.813595857234877e-06, "loss": 7.4842, "step": 728500 }, { "epoch": 2.77, "learning_rate": 3.7818961975624233e-06, "loss": 7.5, "step": 729000 }, { "epoch": 2.77, "learning_rate": 3.7501965378899696e-06, "loss": 7.4932, "step": 729500 }, { "epoch": 2.78, "learning_rate": 3.718496878217516e-06, "loss": 7.4867, "step": 730000 }, { "epoch": 2.78, "learning_rate": 3.686797218545062e-06, "loss": 7.4942, "step": 730500 }, { "epoch": 2.78, "learning_rate": 3.6550975588726085e-06, "loss": 7.4817, "step": 731000 }, { "epoch": 2.78, "learning_rate": 3.6233978992001543e-06, "loss": 7.4858, "step": 731500 }, { "epoch": 2.78, "learning_rate": 3.5916982395277006e-06, "loss": 7.484, "step": 732000 }, { "epoch": 2.79, "learning_rate": 3.559998579855247e-06, "loss": 7.4813, "step": 732500 }, { "epoch": 2.79, "learning_rate": 3.528298920182793e-06, "loss": 7.4867, "step": 733000 }, { "epoch": 2.79, "learning_rate": 3.4965992605103394e-06, "loss": 7.5001, "step": 733500 }, { "epoch": 2.79, "learning_rate": 3.4648996008378857e-06, "loss": 7.4765, "step": 734000 }, { "epoch": 2.79, "learning_rate": 3.433199941165432e-06, "loss": 7.4821, "step": 734500 }, { "epoch": 2.8, "learning_rate": 3.401500281492978e-06, "loss": 7.4928, "step": 735000 }, { "epoch": 2.8, "learning_rate": 3.369800621820524e-06, "loss": 7.4892, "step": 735500 }, { "epoch": 2.8, "learning_rate": 3.3381009621480704e-06, "loss": 7.492, "step": 736000 }, { "epoch": 2.8, "learning_rate": 3.3064013024756167e-06, "loss": 7.502, "step": 736500 }, { "epoch": 2.8, "learning_rate": 3.274701642803163e-06, "loss": 7.4886, "step": 737000 }, { "epoch": 2.81, "learning_rate": 3.2430019831307093e-06, "loss": 7.478, "step": 737500 }, { "epoch": 2.81, "learning_rate": 3.2113023234582555e-06, "loss": 7.4988, "step": 738000 }, { "epoch": 2.81, "learning_rate": 3.1796026637858014e-06, "loss": 7.5066, "step": 738500 }, { "epoch": 2.81, "learning_rate": 3.1479030041133477e-06, "loss": 7.4705, "step": 739000 }, { "epoch": 2.81, "learning_rate": 3.1162033444408944e-06, "loss": 7.4903, "step": 739500 }, { "epoch": 2.81, "learning_rate": 3.0845036847684407e-06, "loss": 7.4835, "step": 740000 }, { "epoch": 2.82, "learning_rate": 3.052804025095987e-06, "loss": 7.5007, "step": 740500 }, { "epoch": 2.82, "learning_rate": 3.0211043654235332e-06, "loss": 7.4836, "step": 741000 }, { "epoch": 2.82, "learning_rate": 2.989404705751079e-06, "loss": 7.4821, "step": 741500 }, { "epoch": 2.82, "learning_rate": 2.9577050460786254e-06, "loss": 7.4831, "step": 742000 }, { "epoch": 2.82, "learning_rate": 2.9260053864061716e-06, "loss": 7.4771, "step": 742500 }, { "epoch": 2.83, "learning_rate": 2.894305726733718e-06, "loss": 7.4929, "step": 743000 }, { "epoch": 2.83, "learning_rate": 2.862606067061264e-06, "loss": 7.4817, "step": 743500 }, { "epoch": 2.83, "learning_rate": 2.8309064073888105e-06, "loss": 7.4738, "step": 744000 }, { "epoch": 2.83, "learning_rate": 2.7992067477163568e-06, "loss": 7.4834, "step": 744500 }, { "epoch": 2.83, "learning_rate": 2.7675070880439026e-06, "loss": 7.4874, "step": 745000 }, { "epoch": 2.84, "learning_rate": 2.735807428371449e-06, "loss": 7.4922, "step": 745500 }, { "epoch": 2.84, "learning_rate": 2.704107768698995e-06, "loss": 7.4841, "step": 746000 }, { "epoch": 2.84, "learning_rate": 2.6724081090265415e-06, "loss": 7.4811, "step": 746500 }, { "epoch": 2.84, "learning_rate": 2.6407084493540877e-06, "loss": 7.5024, "step": 747000 }, { "epoch": 2.84, "learning_rate": 2.6090087896816345e-06, "loss": 7.4905, "step": 747500 }, { "epoch": 2.85, "learning_rate": 2.5773091300091803e-06, "loss": 7.4966, "step": 748000 }, { "epoch": 2.85, "learning_rate": 2.5456094703367266e-06, "loss": 7.4824, "step": 748500 }, { "epoch": 2.85, "learning_rate": 2.513909810664273e-06, "loss": 7.4926, "step": 749000 }, { "epoch": 2.85, "learning_rate": 2.482210150991819e-06, "loss": 7.5065, "step": 749500 }, { "epoch": 2.85, "learning_rate": 2.4505104913193654e-06, "loss": 7.489, "step": 750000 }, { "epoch": 2.85, "learning_rate": 2.4188108316469117e-06, "loss": 7.4916, "step": 750500 }, { "epoch": 2.86, "learning_rate": 2.387111171974458e-06, "loss": 7.4795, "step": 751000 }, { "epoch": 2.86, "learning_rate": 2.355411512302004e-06, "loss": 7.4899, "step": 751500 }, { "epoch": 2.86, "learning_rate": 2.32371185262955e-06, "loss": 7.4849, "step": 752000 }, { "epoch": 2.86, "learning_rate": 2.2920121929570964e-06, "loss": 7.4987, "step": 752500 }, { "epoch": 2.86, "learning_rate": 2.2603125332846427e-06, "loss": 7.492, "step": 753000 }, { "epoch": 2.87, "learning_rate": 2.228612873612189e-06, "loss": 7.4875, "step": 753500 }, { "epoch": 2.87, "learning_rate": 2.1969132139397353e-06, "loss": 7.4801, "step": 754000 }, { "epoch": 2.87, "learning_rate": 2.1652135542672815e-06, "loss": 7.4815, "step": 754500 }, { "epoch": 2.87, "learning_rate": 2.1335138945948274e-06, "loss": 7.4784, "step": 755000 }, { "epoch": 2.87, "learning_rate": 2.101814234922374e-06, "loss": 7.49, "step": 755500 }, { "epoch": 2.88, "learning_rate": 2.0701145752499204e-06, "loss": 7.4881, "step": 756000 }, { "epoch": 2.88, "learning_rate": 2.0384149155774667e-06, "loss": 7.5105, "step": 756500 }, { "epoch": 2.88, "learning_rate": 2.006715255905013e-06, "loss": 7.474, "step": 757000 }, { "epoch": 2.88, "learning_rate": 1.9750155962325592e-06, "loss": 7.4827, "step": 757500 }, { "epoch": 2.88, "learning_rate": 1.943315936560105e-06, "loss": 7.4983, "step": 758000 }, { "epoch": 2.89, "learning_rate": 1.9116162768876514e-06, "loss": 7.4819, "step": 758500 }, { "epoch": 2.89, "learning_rate": 1.8799166172151976e-06, "loss": 7.4906, "step": 759000 }, { "epoch": 2.89, "learning_rate": 1.848216957542744e-06, "loss": 7.4869, "step": 759500 }, { "epoch": 2.89, "learning_rate": 1.8165172978702902e-06, "loss": 7.5035, "step": 760000 }, { "epoch": 2.89, "learning_rate": 1.7848176381978363e-06, "loss": 7.4905, "step": 760500 }, { "epoch": 2.89, "learning_rate": 1.7531179785253826e-06, "loss": 7.5053, "step": 761000 }, { "epoch": 2.9, "learning_rate": 1.7214183188529288e-06, "loss": 7.4814, "step": 761500 }, { "epoch": 2.9, "learning_rate": 1.689718659180475e-06, "loss": 7.4942, "step": 762000 }, { "epoch": 2.9, "learning_rate": 1.6580189995080212e-06, "loss": 7.4941, "step": 762500 }, { "epoch": 2.9, "learning_rate": 1.6263193398355675e-06, "loss": 7.4806, "step": 763000 }, { "epoch": 2.9, "learning_rate": 1.594619680163114e-06, "loss": 7.488, "step": 763500 }, { "epoch": 2.91, "learning_rate": 1.5629200204906602e-06, "loss": 7.4943, "step": 764000 }, { "epoch": 2.91, "learning_rate": 1.5312203608182063e-06, "loss": 7.4968, "step": 764500 }, { "epoch": 2.91, "learning_rate": 1.4995207011457526e-06, "loss": 7.4926, "step": 765000 }, { "epoch": 2.91, "learning_rate": 1.4678210414732989e-06, "loss": 7.4796, "step": 765500 }, { "epoch": 2.91, "learning_rate": 1.4361213818008451e-06, "loss": 7.4879, "step": 766000 }, { "epoch": 2.92, "learning_rate": 1.4044217221283914e-06, "loss": 7.4766, "step": 766500 }, { "epoch": 2.92, "learning_rate": 1.3727220624559375e-06, "loss": 7.4799, "step": 767000 }, { "epoch": 2.92, "learning_rate": 1.3410224027834838e-06, "loss": 7.4736, "step": 767500 }, { "epoch": 2.92, "learning_rate": 1.30932274311103e-06, "loss": 7.4757, "step": 768000 }, { "epoch": 2.92, "learning_rate": 1.2776230834385761e-06, "loss": 7.4907, "step": 768500 }, { "epoch": 2.93, "learning_rate": 1.2459234237661224e-06, "loss": 7.4913, "step": 769000 }, { "epoch": 2.93, "learning_rate": 1.214223764093669e-06, "loss": 7.4808, "step": 769500 }, { "epoch": 2.93, "learning_rate": 1.182524104421215e-06, "loss": 7.4841, "step": 770000 }, { "epoch": 2.93, "learning_rate": 1.1508244447487612e-06, "loss": 7.4796, "step": 770500 }, { "epoch": 2.93, "learning_rate": 1.1191247850763075e-06, "loss": 7.4991, "step": 771000 }, { "epoch": 2.93, "learning_rate": 1.0874251254038536e-06, "loss": 7.489, "step": 771500 }, { "epoch": 2.94, "learning_rate": 1.0557254657313999e-06, "loss": 7.4808, "step": 772000 }, { "epoch": 2.94, "learning_rate": 1.0240258060589462e-06, "loss": 7.4912, "step": 772500 }, { "epoch": 2.94, "learning_rate": 9.923261463864924e-07, "loss": 7.4826, "step": 773000 }, { "epoch": 2.94, "learning_rate": 9.606264867140387e-07, "loss": 7.4879, "step": 773500 }, { "epoch": 2.94, "learning_rate": 9.28926827041585e-07, "loss": 7.4882, "step": 774000 }, { "epoch": 2.95, "learning_rate": 8.972271673691312e-07, "loss": 7.5192, "step": 774500 }, { "epoch": 2.95, "learning_rate": 8.655275076966775e-07, "loss": 7.4925, "step": 775000 }, { "epoch": 2.95, "learning_rate": 8.338278480242236e-07, "loss": 7.4822, "step": 775500 }, { "epoch": 2.95, "learning_rate": 8.021281883517698e-07, "loss": 7.4826, "step": 776000 }, { "epoch": 2.95, "learning_rate": 7.704285286793162e-07, "loss": 7.5019, "step": 776500 }, { "epoch": 2.96, "learning_rate": 7.387288690068624e-07, "loss": 7.4893, "step": 777000 }, { "epoch": 2.96, "learning_rate": 7.070292093344086e-07, "loss": 7.4881, "step": 777500 }, { "epoch": 2.96, "learning_rate": 6.753295496619548e-07, "loss": 7.4814, "step": 778000 }, { "epoch": 2.96, "learning_rate": 6.436298899895011e-07, "loss": 7.4952, "step": 778500 }, { "epoch": 2.96, "learning_rate": 6.119302303170474e-07, "loss": 7.5016, "step": 779000 }, { "epoch": 2.97, "learning_rate": 5.802305706445936e-07, "loss": 7.4836, "step": 779500 }, { "epoch": 2.97, "learning_rate": 5.485309109721397e-07, "loss": 7.5027, "step": 780000 }, { "epoch": 2.97, "learning_rate": 5.168312512996861e-07, "loss": 7.5021, "step": 780500 }, { "epoch": 2.97, "learning_rate": 4.851315916272323e-07, "loss": 7.4949, "step": 781000 }, { "epoch": 2.97, "learning_rate": 4.534319319547785e-07, "loss": 7.4767, "step": 781500 }, { "epoch": 2.97, "learning_rate": 4.2173227228232475e-07, "loss": 7.4891, "step": 782000 }, { "epoch": 2.98, "learning_rate": 3.9003261260987103e-07, "loss": 7.4797, "step": 782500 }, { "epoch": 2.98, "learning_rate": 3.5833295293741726e-07, "loss": 7.502, "step": 783000 }, { "epoch": 2.98, "learning_rate": 3.266332932649635e-07, "loss": 7.4929, "step": 783500 }, { "epoch": 2.98, "learning_rate": 2.9493363359250977e-07, "loss": 7.4897, "step": 784000 }, { "epoch": 2.98, "learning_rate": 2.63233973920056e-07, "loss": 7.4833, "step": 784500 }, { "epoch": 2.99, "learning_rate": 2.3153431424760225e-07, "loss": 7.4747, "step": 785000 }, { "epoch": 2.99, "learning_rate": 1.9983465457514848e-07, "loss": 7.485, "step": 785500 }, { "epoch": 2.99, "learning_rate": 1.6813499490269473e-07, "loss": 7.4975, "step": 786000 }, { "epoch": 2.99, "learning_rate": 1.36435335230241e-07, "loss": 7.4765, "step": 786500 }, { "epoch": 2.99, "learning_rate": 1.0473567555778721e-07, "loss": 7.482, "step": 787000 }, { "epoch": 3.0, "learning_rate": 7.303601588533346e-08, "loss": 7.4931, "step": 787500 }, { "epoch": 3.0, "learning_rate": 4.13363562128797e-08, "loss": 7.4786, "step": 788000 }, { "epoch": 3.0, "learning_rate": 9.636696540425943e-09, "loss": 7.4738, "step": 788500 }, { "epoch": 3.0, "step": 788652, "total_flos": 1.3227171767511228e+19, "train_loss": 7.491424576953926, "train_runtime": 331955.1531, "train_samples_per_second": 19.006, "train_steps_per_second": 2.376 } ], "max_steps": 788652, "num_train_epochs": 3, "total_flos": 1.3227171767511228e+19, "trial_name": null, "trial_params": null }