diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9487 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 788652, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.996830034032755e-05, + "loss": 7.8831, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9936600680655096e-05, + "loss": 7.6494, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990490102098264e-05, + "loss": 7.6212, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987320136131019e-05, + "loss": 7.6134, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984150170163773e-05, + "loss": 7.5908, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980980204196528e-05, + "loss": 7.5855, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9778102382292826e-05, + "loss": 7.5888, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974640272262037e-05, + "loss": 7.5772, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971470306294792e-05, + "loss": 7.5693, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968300340327546e-05, + "loss": 7.5633, + "step": 5000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965130374360301e-05, + "loss": 7.567, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 4.961960408393056e-05, + "loss": 7.5412, + "step": 6000 + }, + { + "epoch": 0.02, + "learning_rate": 4.95879044242581e-05, + "loss": 7.5584, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955620476458565e-05, + "loss": 7.5604, + "step": 7000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952450510491319e-05, + "loss": 7.568, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949280544524074e-05, + "loss": 7.5403, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946110578556829e-05, + "loss": 7.5384, + "step": 8500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9429406125895836e-05, + "loss": 7.552, + "step": 9000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939770646622338e-05, + "loss": 7.553, + "step": 9500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936600680655093e-05, + "loss": 7.5243, + "step": 10000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933430714687847e-05, + "loss": 7.5329, + "step": 10500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930260748720602e-05, + "loss": 7.5257, + "step": 11000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9270907827533566e-05, + "loss": 7.5282, + "step": 11500 + }, + { + "epoch": 0.05, + "learning_rate": 4.923920816786111e-05, + "loss": 7.5224, + "step": 12000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920750850818866e-05, + "loss": 7.5279, + "step": 12500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91758088485162e-05, + "loss": 7.5301, + "step": 13000 + }, + { + "epoch": 0.05, + "learning_rate": 4.914410918884375e-05, + "loss": 7.5161, + "step": 13500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9112409529171297e-05, + "loss": 7.5328, + "step": 14000 + }, + { + "epoch": 0.06, + "learning_rate": 4.908070986949884e-05, + "loss": 7.5159, + "step": 14500 + }, + { + "epoch": 0.06, + "learning_rate": 4.904901020982639e-05, + "loss": 7.505, + "step": 15000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901731055015393e-05, + "loss": 7.5269, + "step": 15500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898561089048148e-05, + "loss": 7.5221, + "step": 16000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895391123080903e-05, + "loss": 7.5086, + "step": 16500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892221157113657e-05, + "loss": 7.5176, + "step": 17000 + }, + { + "epoch": 0.07, + "learning_rate": 4.889051191146412e-05, + "loss": 7.5047, + "step": 17500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885881225179167e-05, + "loss": 7.5244, + "step": 18000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8827112592119214e-05, + "loss": 7.5216, + "step": 18500 + }, + { + "epoch": 0.07, + "learning_rate": 4.879541293244676e-05, + "loss": 7.5125, + "step": 19000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876371327277431e-05, + "loss": 7.5166, + "step": 19500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873201361310185e-05, + "loss": 7.5145, + "step": 20000 + }, + { + "epoch": 0.08, + "learning_rate": 4.87003139534294e-05, + "loss": 7.5154, + "step": 20500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8668614293756944e-05, + "loss": 7.4982, + "step": 21000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8636914634084494e-05, + "loss": 7.5113, + "step": 21500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860521497441204e-05, + "loss": 7.5053, + "step": 22000 + }, + { + "epoch": 0.09, + "learning_rate": 4.857351531473958e-05, + "loss": 7.5311, + "step": 22500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854181565506713e-05, + "loss": 7.4977, + "step": 23000 + }, + { + "epoch": 0.09, + "learning_rate": 4.851011599539468e-05, + "loss": 7.5142, + "step": 23500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8478416335722224e-05, + "loss": 7.5061, + "step": 24000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844671667604977e-05, + "loss": 7.519, + "step": 24500 + }, + { + "epoch": 0.1, + "learning_rate": 4.841501701637731e-05, + "loss": 7.5055, + "step": 25000 + }, + { + "epoch": 0.1, + "learning_rate": 4.838331735670486e-05, + "loss": 7.5135, + "step": 25500 + }, + { + "epoch": 0.1, + "learning_rate": 4.835161769703241e-05, + "loss": 7.5041, + "step": 26000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8319918037359954e-05, + "loss": 7.5036, + "step": 26500 + }, + { + "epoch": 0.1, + "learning_rate": 4.82882183776875e-05, + "loss": 7.526, + "step": 27000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825651871801505e-05, + "loss": 7.5069, + "step": 27500 + }, + { + "epoch": 0.11, + "learning_rate": 4.822481905834259e-05, + "loss": 7.4989, + "step": 28000 + }, + { + "epoch": 0.11, + "learning_rate": 4.819311939867014e-05, + "loss": 7.5217, + "step": 28500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8161419738997684e-05, + "loss": 7.4926, + "step": 29000 + }, + { + "epoch": 0.11, + "learning_rate": 4.812972007932523e-05, + "loss": 7.5044, + "step": 29500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809802041965278e-05, + "loss": 7.4951, + "step": 30000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806632075998032e-05, + "loss": 7.5096, + "step": 30500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803462110030787e-05, + "loss": 7.5154, + "step": 31000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800292144063542e-05, + "loss": 7.5022, + "step": 31500 + }, + { + "epoch": 0.12, + "learning_rate": 4.797122178096296e-05, + "loss": 7.5204, + "step": 32000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793952212129051e-05, + "loss": 7.5097, + "step": 32500 + }, + { + "epoch": 0.13, + "learning_rate": 4.790782246161806e-05, + "loss": 7.5172, + "step": 33000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78761228019456e-05, + "loss": 7.4931, + "step": 33500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784442314227315e-05, + "loss": 7.4993, + "step": 34000 + }, + { + "epoch": 0.13, + "learning_rate": 4.781272348260069e-05, + "loss": 7.4984, + "step": 34500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778102382292824e-05, + "loss": 7.5244, + "step": 35000 + }, + { + "epoch": 0.14, + "learning_rate": 4.774932416325579e-05, + "loss": 7.504, + "step": 35500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771762450358333e-05, + "loss": 7.5031, + "step": 36000 + }, + { + "epoch": 0.14, + "learning_rate": 4.768592484391088e-05, + "loss": 7.5034, + "step": 36500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7654225184238425e-05, + "loss": 7.5217, + "step": 37000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762252552456597e-05, + "loss": 7.5025, + "step": 37500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759082586489352e-05, + "loss": 7.5059, + "step": 38000 + }, + { + "epoch": 0.15, + "learning_rate": 4.755912620522106e-05, + "loss": 7.5178, + "step": 38500 + }, + { + "epoch": 0.15, + "learning_rate": 4.752742654554861e-05, + "loss": 7.5004, + "step": 39000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7495726885876155e-05, + "loss": 7.5094, + "step": 39500 + }, + { + "epoch": 0.15, + "learning_rate": 4.74640272262037e-05, + "loss": 7.4932, + "step": 40000 + }, + { + "epoch": 0.15, + "learning_rate": 4.743232756653125e-05, + "loss": 7.5062, + "step": 40500 + }, + { + "epoch": 0.16, + "learning_rate": 4.74006279068588e-05, + "loss": 7.5029, + "step": 41000 + }, + { + "epoch": 0.16, + "learning_rate": 4.736892824718634e-05, + "loss": 7.5028, + "step": 41500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7337228587513885e-05, + "loss": 7.5218, + "step": 42000 + }, + { + "epoch": 0.16, + "learning_rate": 4.730552892784143e-05, + "loss": 7.4965, + "step": 42500 + }, + { + "epoch": 0.16, + "learning_rate": 4.727382926816898e-05, + "loss": 7.5068, + "step": 43000 + }, + { + "epoch": 0.17, + "learning_rate": 4.724212960849653e-05, + "loss": 7.5086, + "step": 43500 + }, + { + "epoch": 0.17, + "learning_rate": 4.721042994882407e-05, + "loss": 7.4934, + "step": 44000 + }, + { + "epoch": 0.17, + "learning_rate": 4.7178730289151615e-05, + "loss": 7.4922, + "step": 44500 + }, + { + "epoch": 0.17, + "learning_rate": 4.7147030629479165e-05, + "loss": 7.4961, + "step": 45000 + }, + { + "epoch": 0.17, + "learning_rate": 4.711533096980671e-05, + "loss": 7.5111, + "step": 45500 + }, + { + "epoch": 0.17, + "learning_rate": 4.708363131013426e-05, + "loss": 7.4964, + "step": 46000 + }, + { + "epoch": 0.18, + "learning_rate": 4.70519316504618e-05, + "loss": 7.5062, + "step": 46500 + }, + { + "epoch": 0.18, + "learning_rate": 4.7020231990789345e-05, + "loss": 7.4992, + "step": 47000 + }, + { + "epoch": 0.18, + "learning_rate": 4.6988532331116895e-05, + "loss": 7.5116, + "step": 47500 + }, + { + "epoch": 0.18, + "learning_rate": 4.695683267144444e-05, + "loss": 7.5012, + "step": 48000 + }, + { + "epoch": 0.18, + "learning_rate": 4.692513301177199e-05, + "loss": 7.4854, + "step": 48500 + }, + { + "epoch": 0.19, + "learning_rate": 4.689343335209954e-05, + "loss": 7.4943, + "step": 49000 + }, + { + "epoch": 0.19, + "learning_rate": 4.686173369242708e-05, + "loss": 7.498, + "step": 49500 + }, + { + "epoch": 0.19, + "learning_rate": 4.6830034032754625e-05, + "loss": 7.4868, + "step": 50000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6798334373082175e-05, + "loss": 7.4862, + "step": 50500 + }, + { + "epoch": 0.19, + "learning_rate": 4.676663471340972e-05, + "loss": 7.4992, + "step": 51000 + }, + { + "epoch": 0.2, + "learning_rate": 4.673493505373727e-05, + "loss": 7.4996, + "step": 51500 + }, + { + "epoch": 0.2, + "learning_rate": 4.670323539406481e-05, + "loss": 7.499, + "step": 52000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6671535734392356e-05, + "loss": 7.503, + "step": 52500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6639836074719906e-05, + "loss": 7.488, + "step": 53000 + }, + { + "epoch": 0.2, + "learning_rate": 4.660813641504745e-05, + "loss": 7.4991, + "step": 53500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6576436755375e-05, + "loss": 7.4968, + "step": 54000 + }, + { + "epoch": 0.21, + "learning_rate": 4.654473709570254e-05, + "loss": 7.5004, + "step": 54500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6513037436030086e-05, + "loss": 7.494, + "step": 55000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6481337776357636e-05, + "loss": 7.4935, + "step": 55500 + }, + { + "epoch": 0.21, + "learning_rate": 4.644963811668518e-05, + "loss": 7.5, + "step": 56000 + }, + { + "epoch": 0.21, + "learning_rate": 4.641793845701273e-05, + "loss": 7.4858, + "step": 56500 + }, + { + "epoch": 0.22, + "learning_rate": 4.638623879734027e-05, + "loss": 7.5052, + "step": 57000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6354539137667816e-05, + "loss": 7.5023, + "step": 57500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6322839477995366e-05, + "loss": 7.4919, + "step": 58000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6291139818322916e-05, + "loss": 7.49, + "step": 58500 + }, + { + "epoch": 0.22, + "learning_rate": 4.625944015865046e-05, + "loss": 7.5039, + "step": 59000 + }, + { + "epoch": 0.23, + "learning_rate": 4.622774049897801e-05, + "loss": 7.4817, + "step": 59500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6196040839305546e-05, + "loss": 7.4938, + "step": 60000 + }, + { + "epoch": 0.23, + "learning_rate": 4.6164341179633096e-05, + "loss": 7.496, + "step": 60500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6132641519960646e-05, + "loss": 7.4985, + "step": 61000 + }, + { + "epoch": 0.23, + "learning_rate": 4.610094186028819e-05, + "loss": 7.4996, + "step": 61500 + }, + { + "epoch": 0.24, + "learning_rate": 4.606924220061574e-05, + "loss": 7.5, + "step": 62000 + }, + { + "epoch": 0.24, + "learning_rate": 4.603754254094328e-05, + "loss": 7.4978, + "step": 62500 + }, + { + "epoch": 0.24, + "learning_rate": 4.6005842881270826e-05, + "loss": 7.4842, + "step": 63000 + }, + { + "epoch": 0.24, + "learning_rate": 4.5974143221598376e-05, + "loss": 7.5213, + "step": 63500 + }, + { + "epoch": 0.24, + "learning_rate": 4.5942443561925926e-05, + "loss": 7.4911, + "step": 64000 + }, + { + "epoch": 0.25, + "learning_rate": 4.591074390225347e-05, + "loss": 7.4901, + "step": 64500 + }, + { + "epoch": 0.25, + "learning_rate": 4.587904424258101e-05, + "loss": 7.5091, + "step": 65000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5847344582908556e-05, + "loss": 7.5016, + "step": 65500 + }, + { + "epoch": 0.25, + "learning_rate": 4.5815644923236106e-05, + "loss": 7.4955, + "step": 66000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5783945263563656e-05, + "loss": 7.484, + "step": 66500 + }, + { + "epoch": 0.25, + "learning_rate": 4.57522456038912e-05, + "loss": 7.4943, + "step": 67000 + }, + { + "epoch": 0.26, + "learning_rate": 4.572054594421874e-05, + "loss": 7.4962, + "step": 67500 + }, + { + "epoch": 0.26, + "learning_rate": 4.568884628454629e-05, + "loss": 7.4952, + "step": 68000 + }, + { + "epoch": 0.26, + "learning_rate": 4.5657146624873837e-05, + "loss": 7.5072, + "step": 68500 + }, + { + "epoch": 0.26, + "learning_rate": 4.5625446965201387e-05, + "loss": 7.5096, + "step": 69000 + }, + { + "epoch": 0.26, + "learning_rate": 4.559374730552893e-05, + "loss": 7.49, + "step": 69500 + }, + { + "epoch": 0.27, + "learning_rate": 4.556204764585647e-05, + "loss": 7.4966, + "step": 70000 + }, + { + "epoch": 0.27, + "learning_rate": 4.553034798618402e-05, + "loss": 7.5115, + "step": 70500 + }, + { + "epoch": 0.27, + "learning_rate": 4.549864832651157e-05, + "loss": 7.503, + "step": 71000 + }, + { + "epoch": 0.27, + "learning_rate": 4.546694866683912e-05, + "loss": 7.4929, + "step": 71500 + }, + { + "epoch": 0.27, + "learning_rate": 4.543524900716667e-05, + "loss": 7.4934, + "step": 72000 + }, + { + "epoch": 0.28, + "learning_rate": 4.54035493474942e-05, + "loss": 7.4957, + "step": 72500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5371849687821753e-05, + "loss": 7.4916, + "step": 73000 + }, + { + "epoch": 0.28, + "learning_rate": 4.53401500281493e-05, + "loss": 7.5026, + "step": 73500 + }, + { + "epoch": 0.28, + "learning_rate": 4.530845036847685e-05, + "loss": 7.5125, + "step": 74000 + }, + { + "epoch": 0.28, + "learning_rate": 4.52767507088044e-05, + "loss": 7.4996, + "step": 74500 + }, + { + "epoch": 0.29, + "learning_rate": 4.5245051049131933e-05, + "loss": 7.4954, + "step": 75000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5213351389459484e-05, + "loss": 7.4997, + "step": 75500 + }, + { + "epoch": 0.29, + "learning_rate": 4.5181651729787034e-05, + "loss": 7.5005, + "step": 76000 + }, + { + "epoch": 0.29, + "learning_rate": 4.514995207011458e-05, + "loss": 7.4982, + "step": 76500 + }, + { + "epoch": 0.29, + "learning_rate": 4.511825241044213e-05, + "loss": 7.4946, + "step": 77000 + }, + { + "epoch": 0.29, + "learning_rate": 4.508655275076967e-05, + "loss": 7.4983, + "step": 77500 + }, + { + "epoch": 0.3, + "learning_rate": 4.5054853091097214e-05, + "loss": 7.5142, + "step": 78000 + }, + { + "epoch": 0.3, + "learning_rate": 4.5023153431424764e-05, + "loss": 7.4921, + "step": 78500 + }, + { + "epoch": 0.3, + "learning_rate": 4.499145377175231e-05, + "loss": 7.4931, + "step": 79000 + }, + { + "epoch": 0.3, + "learning_rate": 4.495975411207986e-05, + "loss": 7.5017, + "step": 79500 + }, + { + "epoch": 0.3, + "learning_rate": 4.49280544524074e-05, + "loss": 7.4859, + "step": 80000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4896354792734944e-05, + "loss": 7.4899, + "step": 80500 + }, + { + "epoch": 0.31, + "learning_rate": 4.4864655133062494e-05, + "loss": 7.4942, + "step": 81000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4832955473390044e-05, + "loss": 7.4778, + "step": 81500 + }, + { + "epoch": 0.31, + "learning_rate": 4.480125581371759e-05, + "loss": 7.5, + "step": 82000 + }, + { + "epoch": 0.31, + "learning_rate": 4.476955615404513e-05, + "loss": 7.5007, + "step": 82500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4737856494372674e-05, + "loss": 7.4928, + "step": 83000 + }, + { + "epoch": 0.32, + "learning_rate": 4.4706156834700224e-05, + "loss": 7.4768, + "step": 83500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4674457175027774e-05, + "loss": 7.4966, + "step": 84000 + }, + { + "epoch": 0.32, + "learning_rate": 4.464275751535532e-05, + "loss": 7.5056, + "step": 84500 + }, + { + "epoch": 0.32, + "learning_rate": 4.461105785568286e-05, + "loss": 7.4967, + "step": 85000 + }, + { + "epoch": 0.33, + "learning_rate": 4.457935819601041e-05, + "loss": 7.493, + "step": 85500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4547658536337954e-05, + "loss": 7.5024, + "step": 86000 + }, + { + "epoch": 0.33, + "learning_rate": 4.4515958876665504e-05, + "loss": 7.4954, + "step": 86500 + }, + { + "epoch": 0.33, + "learning_rate": 4.448425921699305e-05, + "loss": 7.493, + "step": 87000 + }, + { + "epoch": 0.33, + "learning_rate": 4.44525595573206e-05, + "loss": 7.5053, + "step": 87500 + }, + { + "epoch": 0.33, + "learning_rate": 4.442085989764814e-05, + "loss": 7.4864, + "step": 88000 + }, + { + "epoch": 0.34, + "learning_rate": 4.4389160237975684e-05, + "loss": 7.4929, + "step": 88500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4357460578303234e-05, + "loss": 7.4995, + "step": 89000 + }, + { + "epoch": 0.34, + "learning_rate": 4.4325760918630785e-05, + "loss": 7.4882, + "step": 89500 + }, + { + "epoch": 0.34, + "learning_rate": 4.429406125895833e-05, + "loss": 7.5016, + "step": 90000 + }, + { + "epoch": 0.34, + "learning_rate": 4.426236159928587e-05, + "loss": 7.4996, + "step": 90500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4230661939613414e-05, + "loss": 7.486, + "step": 91000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4198962279940965e-05, + "loss": 7.5026, + "step": 91500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4167262620268515e-05, + "loss": 7.4921, + "step": 92000 + }, + { + "epoch": 0.35, + "learning_rate": 4.413556296059606e-05, + "loss": 7.5054, + "step": 92500 + }, + { + "epoch": 0.35, + "learning_rate": 4.41038633009236e-05, + "loss": 7.4965, + "step": 93000 + }, + { + "epoch": 0.36, + "learning_rate": 4.407216364125115e-05, + "loss": 7.4943, + "step": 93500 + }, + { + "epoch": 0.36, + "learning_rate": 4.4040463981578695e-05, + "loss": 7.4944, + "step": 94000 + }, + { + "epoch": 0.36, + "learning_rate": 4.4008764321906245e-05, + "loss": 7.4919, + "step": 94500 + }, + { + "epoch": 0.36, + "learning_rate": 4.397706466223379e-05, + "loss": 7.4955, + "step": 95000 + }, + { + "epoch": 0.36, + "learning_rate": 4.394536500256133e-05, + "loss": 7.5051, + "step": 95500 + }, + { + "epoch": 0.37, + "learning_rate": 4.391366534288888e-05, + "loss": 7.5002, + "step": 96000 + }, + { + "epoch": 0.37, + "learning_rate": 4.3881965683216425e-05, + "loss": 7.5037, + "step": 96500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3850266023543975e-05, + "loss": 7.4848, + "step": 97000 + }, + { + "epoch": 0.37, + "learning_rate": 4.381856636387152e-05, + "loss": 7.5064, + "step": 97500 + }, + { + "epoch": 0.37, + "learning_rate": 4.378686670419906e-05, + "loss": 7.4905, + "step": 98000 + }, + { + "epoch": 0.37, + "learning_rate": 4.375516704452661e-05, + "loss": 7.4929, + "step": 98500 + }, + { + "epoch": 0.38, + "learning_rate": 4.372346738485416e-05, + "loss": 7.5115, + "step": 99000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3691767725181705e-05, + "loss": 7.4942, + "step": 99500 + }, + { + "epoch": 0.38, + "learning_rate": 4.3660068065509255e-05, + "loss": 7.5059, + "step": 100000 + }, + { + "epoch": 0.38, + "learning_rate": 4.362836840583679e-05, + "loss": 7.4867, + "step": 100500 + }, + { + "epoch": 0.38, + "learning_rate": 4.359666874616434e-05, + "loss": 7.4966, + "step": 101000 + }, + { + "epoch": 0.39, + "learning_rate": 4.356496908649189e-05, + "loss": 7.4955, + "step": 101500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3533269426819435e-05, + "loss": 7.4877, + "step": 102000 + }, + { + "epoch": 0.39, + "learning_rate": 4.3501569767146985e-05, + "loss": 7.4893, + "step": 102500 + }, + { + "epoch": 0.39, + "learning_rate": 4.346987010747453e-05, + "loss": 7.4861, + "step": 103000 + }, + { + "epoch": 0.39, + "learning_rate": 4.343817044780207e-05, + "loss": 7.4841, + "step": 103500 + }, + { + "epoch": 0.4, + "learning_rate": 4.340647078812962e-05, + "loss": 7.49, + "step": 104000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3374771128457165e-05, + "loss": 7.4895, + "step": 104500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3343071468784715e-05, + "loss": 7.5027, + "step": 105000 + }, + { + "epoch": 0.4, + "learning_rate": 4.331137180911226e-05, + "loss": 7.493, + "step": 105500 + }, + { + "epoch": 0.4, + "learning_rate": 4.32796721494398e-05, + "loss": 7.4999, + "step": 106000 + }, + { + "epoch": 0.41, + "learning_rate": 4.324797248976735e-05, + "loss": 7.5011, + "step": 106500 + }, + { + "epoch": 0.41, + "learning_rate": 4.32162728300949e-05, + "loss": 7.503, + "step": 107000 + }, + { + "epoch": 0.41, + "learning_rate": 4.3184573170422446e-05, + "loss": 7.4797, + "step": 107500 + }, + { + "epoch": 0.41, + "learning_rate": 4.315287351074999e-05, + "loss": 7.4771, + "step": 108000 + }, + { + "epoch": 0.41, + "learning_rate": 4.312117385107753e-05, + "loss": 7.5038, + "step": 108500 + }, + { + "epoch": 0.41, + "learning_rate": 4.308947419140508e-05, + "loss": 7.4926, + "step": 109000 + }, + { + "epoch": 0.42, + "learning_rate": 4.305777453173263e-05, + "loss": 7.5078, + "step": 109500 + }, + { + "epoch": 0.42, + "learning_rate": 4.3026074872060176e-05, + "loss": 7.4931, + "step": 110000 + }, + { + "epoch": 0.42, + "learning_rate": 4.299437521238772e-05, + "loss": 7.4927, + "step": 110500 + }, + { + "epoch": 0.42, + "learning_rate": 4.296267555271527e-05, + "loss": 7.5058, + "step": 111000 + }, + { + "epoch": 0.42, + "learning_rate": 4.293097589304281e-05, + "loss": 7.4955, + "step": 111500 + }, + { + "epoch": 0.43, + "learning_rate": 4.289927623337036e-05, + "loss": 7.4994, + "step": 112000 + }, + { + "epoch": 0.43, + "learning_rate": 4.286757657369791e-05, + "loss": 7.5213, + "step": 112500 + }, + { + "epoch": 0.43, + "learning_rate": 4.283587691402545e-05, + "loss": 7.4715, + "step": 113000 + }, + { + "epoch": 0.43, + "learning_rate": 4.2804177254353e-05, + "loss": 7.5062, + "step": 113500 + }, + { + "epoch": 0.43, + "learning_rate": 4.277247759468054e-05, + "loss": 7.5004, + "step": 114000 + }, + { + "epoch": 0.44, + "learning_rate": 4.274077793500809e-05, + "loss": 7.4991, + "step": 114500 + }, + { + "epoch": 0.44, + "learning_rate": 4.270907827533564e-05, + "loss": 7.4989, + "step": 115000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2677378615663186e-05, + "loss": 7.4709, + "step": 115500 + }, + { + "epoch": 0.44, + "learning_rate": 4.264567895599073e-05, + "loss": 7.4851, + "step": 116000 + }, + { + "epoch": 0.44, + "learning_rate": 4.261397929631828e-05, + "loss": 7.49, + "step": 116500 + }, + { + "epoch": 0.45, + "learning_rate": 4.258227963664582e-05, + "loss": 7.4925, + "step": 117000 + }, + { + "epoch": 0.45, + "learning_rate": 4.255057997697337e-05, + "loss": 7.4997, + "step": 117500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2518880317300916e-05, + "loss": 7.5011, + "step": 118000 + }, + { + "epoch": 0.45, + "learning_rate": 4.248718065762846e-05, + "loss": 7.495, + "step": 118500 + }, + { + "epoch": 0.45, + "learning_rate": 4.245548099795601e-05, + "loss": 7.4749, + "step": 119000 + }, + { + "epoch": 0.45, + "learning_rate": 4.242378133828355e-05, + "loss": 7.5037, + "step": 119500 + }, + { + "epoch": 0.46, + "learning_rate": 4.23920816786111e-05, + "loss": 7.4854, + "step": 120000 + }, + { + "epoch": 0.46, + "learning_rate": 4.2360382018938646e-05, + "loss": 7.5, + "step": 120500 + }, + { + "epoch": 0.46, + "learning_rate": 4.232868235926619e-05, + "loss": 7.5072, + "step": 121000 + }, + { + "epoch": 0.46, + "learning_rate": 4.229698269959374e-05, + "loss": 7.4904, + "step": 121500 + }, + { + "epoch": 0.46, + "learning_rate": 4.226528303992128e-05, + "loss": 7.4916, + "step": 122000 + }, + { + "epoch": 0.47, + "learning_rate": 4.223358338024883e-05, + "loss": 7.4959, + "step": 122500 + }, + { + "epoch": 0.47, + "learning_rate": 4.2201883720576376e-05, + "loss": 7.5056, + "step": 123000 + }, + { + "epoch": 0.47, + "learning_rate": 4.217018406090392e-05, + "loss": 7.4923, + "step": 123500 + }, + { + "epoch": 0.47, + "learning_rate": 4.213848440123147e-05, + "loss": 7.4946, + "step": 124000 + }, + { + "epoch": 0.47, + "learning_rate": 4.210678474155902e-05, + "loss": 7.4985, + "step": 124500 + }, + { + "epoch": 0.48, + "learning_rate": 4.207508508188656e-05, + "loss": 7.4835, + "step": 125000 + }, + { + "epoch": 0.48, + "learning_rate": 4.2043385422214107e-05, + "loss": 7.4963, + "step": 125500 + }, + { + "epoch": 0.48, + "learning_rate": 4.201168576254165e-05, + "loss": 7.4945, + "step": 126000 + }, + { + "epoch": 0.48, + "learning_rate": 4.19799861028692e-05, + "loss": 7.4981, + "step": 126500 + }, + { + "epoch": 0.48, + "learning_rate": 4.194828644319675e-05, + "loss": 7.4966, + "step": 127000 + }, + { + "epoch": 0.49, + "learning_rate": 4.1916586783524293e-05, + "loss": 7.4856, + "step": 127500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1884887123851843e-05, + "loss": 7.4974, + "step": 128000 + }, + { + "epoch": 0.49, + "learning_rate": 4.185318746417939e-05, + "loss": 7.4956, + "step": 128500 + }, + { + "epoch": 0.49, + "learning_rate": 4.182148780450693e-05, + "loss": 7.4918, + "step": 129000 + }, + { + "epoch": 0.49, + "learning_rate": 4.178978814483448e-05, + "loss": 7.5015, + "step": 129500 + }, + { + "epoch": 0.49, + "learning_rate": 4.175808848516203e-05, + "loss": 7.4968, + "step": 130000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1726388825489574e-05, + "loss": 7.497, + "step": 130500 + }, + { + "epoch": 0.5, + "learning_rate": 4.169468916581712e-05, + "loss": 7.4941, + "step": 131000 + }, + { + "epoch": 0.5, + "learning_rate": 4.166298950614466e-05, + "loss": 7.5052, + "step": 131500 + }, + { + "epoch": 0.5, + "learning_rate": 4.163128984647221e-05, + "loss": 7.4969, + "step": 132000 + }, + { + "epoch": 0.5, + "learning_rate": 4.159959018679976e-05, + "loss": 7.4822, + "step": 132500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1567890527127304e-05, + "loss": 7.4896, + "step": 133000 + }, + { + "epoch": 0.51, + "learning_rate": 4.153619086745485e-05, + "loss": 7.5044, + "step": 133500 + }, + { + "epoch": 0.51, + "learning_rate": 4.15044912077824e-05, + "loss": 7.4871, + "step": 134000 + }, + { + "epoch": 0.51, + "learning_rate": 4.147279154810994e-05, + "loss": 7.4949, + "step": 134500 + }, + { + "epoch": 0.51, + "learning_rate": 4.144109188843749e-05, + "loss": 7.4831, + "step": 135000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1409392228765034e-05, + "loss": 7.4978, + "step": 135500 + }, + { + "epoch": 0.52, + "learning_rate": 4.137769256909258e-05, + "loss": 7.5074, + "step": 136000 + }, + { + "epoch": 0.52, + "learning_rate": 4.134599290942013e-05, + "loss": 7.4895, + "step": 136500 + }, + { + "epoch": 0.52, + "learning_rate": 4.131429324974767e-05, + "loss": 7.4818, + "step": 137000 + }, + { + "epoch": 0.52, + "learning_rate": 4.128259359007522e-05, + "loss": 7.5038, + "step": 137500 + }, + { + "epoch": 0.52, + "learning_rate": 4.125089393040277e-05, + "loss": 7.5023, + "step": 138000 + }, + { + "epoch": 0.53, + "learning_rate": 4.121919427073031e-05, + "loss": 7.4888, + "step": 138500 + }, + { + "epoch": 0.53, + "learning_rate": 4.118749461105786e-05, + "loss": 7.4901, + "step": 139000 + }, + { + "epoch": 0.53, + "learning_rate": 4.11557949513854e-05, + "loss": 7.4867, + "step": 139500 + }, + { + "epoch": 0.53, + "learning_rate": 4.112409529171295e-05, + "loss": 7.5052, + "step": 140000 + }, + { + "epoch": 0.53, + "learning_rate": 4.10923956320405e-05, + "loss": 7.4933, + "step": 140500 + }, + { + "epoch": 0.54, + "learning_rate": 4.106069597236804e-05, + "loss": 7.4912, + "step": 141000 + }, + { + "epoch": 0.54, + "learning_rate": 4.102899631269559e-05, + "loss": 7.497, + "step": 141500 + }, + { + "epoch": 0.54, + "learning_rate": 4.099729665302314e-05, + "loss": 7.4989, + "step": 142000 + }, + { + "epoch": 0.54, + "learning_rate": 4.096559699335068e-05, + "loss": 7.486, + "step": 142500 + }, + { + "epoch": 0.54, + "learning_rate": 4.093389733367823e-05, + "loss": 7.4852, + "step": 143000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0902197674005774e-05, + "loss": 7.4835, + "step": 143500 + }, + { + "epoch": 0.55, + "learning_rate": 4.087049801433332e-05, + "loss": 7.4888, + "step": 144000 + }, + { + "epoch": 0.55, + "learning_rate": 4.083879835466087e-05, + "loss": 7.492, + "step": 144500 + }, + { + "epoch": 0.55, + "learning_rate": 4.080709869498841e-05, + "loss": 7.4884, + "step": 145000 + }, + { + "epoch": 0.55, + "learning_rate": 4.077539903531596e-05, + "loss": 7.4859, + "step": 145500 + }, + { + "epoch": 0.56, + "learning_rate": 4.0743699375643505e-05, + "loss": 7.4929, + "step": 146000 + }, + { + "epoch": 0.56, + "learning_rate": 4.071199971597105e-05, + "loss": 7.4726, + "step": 146500 + }, + { + "epoch": 0.56, + "learning_rate": 4.06803000562986e-05, + "loss": 7.4954, + "step": 147000 + }, + { + "epoch": 0.56, + "learning_rate": 4.064860039662615e-05, + "loss": 7.4859, + "step": 147500 + }, + { + "epoch": 0.56, + "learning_rate": 4.061690073695369e-05, + "loss": 7.4997, + "step": 148000 + }, + { + "epoch": 0.56, + "learning_rate": 4.0585201077281235e-05, + "loss": 7.4929, + "step": 148500 + }, + { + "epoch": 0.57, + "learning_rate": 4.055350141760878e-05, + "loss": 7.5029, + "step": 149000 + }, + { + "epoch": 0.57, + "learning_rate": 4.052180175793633e-05, + "loss": 7.4995, + "step": 149500 + }, + { + "epoch": 0.57, + "learning_rate": 4.049010209826388e-05, + "loss": 7.4838, + "step": 150000 + }, + { + "epoch": 0.57, + "learning_rate": 4.045840243859142e-05, + "loss": 7.4977, + "step": 150500 + }, + { + "epoch": 0.57, + "learning_rate": 4.0426702778918965e-05, + "loss": 7.4772, + "step": 151000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0395003119246515e-05, + "loss": 7.4779, + "step": 151500 + }, + { + "epoch": 0.58, + "learning_rate": 4.036330345957406e-05, + "loss": 7.4878, + "step": 152000 + }, + { + "epoch": 0.58, + "learning_rate": 4.033160379990161e-05, + "loss": 7.4886, + "step": 152500 + }, + { + "epoch": 0.58, + "learning_rate": 4.029990414022915e-05, + "loss": 7.4924, + "step": 153000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0268204480556695e-05, + "loss": 7.4925, + "step": 153500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0236504820884245e-05, + "loss": 7.4942, + "step": 154000 + }, + { + "epoch": 0.59, + "learning_rate": 4.020480516121179e-05, + "loss": 7.4917, + "step": 154500 + }, + { + "epoch": 0.59, + "learning_rate": 4.017310550153934e-05, + "loss": 7.4959, + "step": 155000 + }, + { + "epoch": 0.59, + "learning_rate": 4.014140584186689e-05, + "loss": 7.5003, + "step": 155500 + }, + { + "epoch": 0.59, + "learning_rate": 4.010970618219443e-05, + "loss": 7.5046, + "step": 156000 + }, + { + "epoch": 0.6, + "learning_rate": 4.0078006522521975e-05, + "loss": 7.4904, + "step": 156500 + }, + { + "epoch": 0.6, + "learning_rate": 4.004630686284952e-05, + "loss": 7.4885, + "step": 157000 + }, + { + "epoch": 0.6, + "learning_rate": 4.001460720317707e-05, + "loss": 7.4943, + "step": 157500 + }, + { + "epoch": 0.6, + "learning_rate": 3.998290754350462e-05, + "loss": 7.4867, + "step": 158000 + }, + { + "epoch": 0.6, + "learning_rate": 3.995120788383216e-05, + "loss": 7.4789, + "step": 158500 + }, + { + "epoch": 0.6, + "learning_rate": 3.9919508224159705e-05, + "loss": 7.491, + "step": 159000 + }, + { + "epoch": 0.61, + "learning_rate": 3.9887808564487255e-05, + "loss": 7.4918, + "step": 159500 + }, + { + "epoch": 0.61, + "learning_rate": 3.98561089048148e-05, + "loss": 7.4913, + "step": 160000 + }, + { + "epoch": 0.61, + "learning_rate": 3.982440924514235e-05, + "loss": 7.4989, + "step": 160500 + }, + { + "epoch": 0.61, + "learning_rate": 3.979270958546989e-05, + "loss": 7.4798, + "step": 161000 + }, + { + "epoch": 0.61, + "learning_rate": 3.9761009925797435e-05, + "loss": 7.4842, + "step": 161500 + }, + { + "epoch": 0.62, + "learning_rate": 3.9729310266124985e-05, + "loss": 7.5008, + "step": 162000 + }, + { + "epoch": 0.62, + "learning_rate": 3.969761060645253e-05, + "loss": 7.4923, + "step": 162500 + }, + { + "epoch": 0.62, + "learning_rate": 3.966591094678008e-05, + "loss": 7.4895, + "step": 163000 + }, + { + "epoch": 0.62, + "learning_rate": 3.963421128710762e-05, + "loss": 7.4881, + "step": 163500 + }, + { + "epoch": 0.62, + "learning_rate": 3.9602511627435166e-05, + "loss": 7.4888, + "step": 164000 + }, + { + "epoch": 0.63, + "learning_rate": 3.9570811967762716e-05, + "loss": 7.4991, + "step": 164500 + }, + { + "epoch": 0.63, + "learning_rate": 3.9539112308090266e-05, + "loss": 7.4991, + "step": 165000 + }, + { + "epoch": 0.63, + "learning_rate": 3.950741264841781e-05, + "loss": 7.4837, + "step": 165500 + }, + { + "epoch": 0.63, + "learning_rate": 3.947571298874536e-05, + "loss": 7.4836, + "step": 166000 + }, + { + "epoch": 0.63, + "learning_rate": 3.9444013329072896e-05, + "loss": 7.5083, + "step": 166500 + }, + { + "epoch": 0.64, + "learning_rate": 3.9412313669400446e-05, + "loss": 7.4966, + "step": 167000 + }, + { + "epoch": 0.64, + "learning_rate": 3.9380614009727996e-05, + "loss": 7.494, + "step": 167500 + }, + { + "epoch": 0.64, + "learning_rate": 3.934891435005554e-05, + "loss": 7.4945, + "step": 168000 + }, + { + "epoch": 0.64, + "learning_rate": 3.931721469038309e-05, + "loss": 7.4825, + "step": 168500 + }, + { + "epoch": 0.64, + "learning_rate": 3.928551503071063e-05, + "loss": 7.5042, + "step": 169000 + }, + { + "epoch": 0.64, + "learning_rate": 3.9253815371038176e-05, + "loss": 7.4825, + "step": 169500 + }, + { + "epoch": 0.65, + "learning_rate": 3.9222115711365726e-05, + "loss": 7.501, + "step": 170000 + }, + { + "epoch": 0.65, + "learning_rate": 3.919041605169327e-05, + "loss": 7.5055, + "step": 170500 + }, + { + "epoch": 0.65, + "learning_rate": 3.915871639202082e-05, + "loss": 7.4882, + "step": 171000 + }, + { + "epoch": 0.65, + "learning_rate": 3.912701673234836e-05, + "loss": 7.4768, + "step": 171500 + }, + { + "epoch": 0.65, + "learning_rate": 3.9095317072675906e-05, + "loss": 7.4929, + "step": 172000 + }, + { + "epoch": 0.66, + "learning_rate": 3.9063617413003456e-05, + "loss": 7.4906, + "step": 172500 + }, + { + "epoch": 0.66, + "learning_rate": 3.9031917753331006e-05, + "loss": 7.4936, + "step": 173000 + }, + { + "epoch": 0.66, + "learning_rate": 3.900021809365855e-05, + "loss": 7.5112, + "step": 173500 + }, + { + "epoch": 0.66, + "learning_rate": 3.896851843398609e-05, + "loss": 7.4947, + "step": 174000 + }, + { + "epoch": 0.66, + "learning_rate": 3.8936818774313636e-05, + "loss": 7.4788, + "step": 174500 + }, + { + "epoch": 0.67, + "learning_rate": 3.8905119114641186e-05, + "loss": 7.4922, + "step": 175000 + }, + { + "epoch": 0.67, + "learning_rate": 3.8873419454968736e-05, + "loss": 7.5001, + "step": 175500 + }, + { + "epoch": 0.67, + "learning_rate": 3.884171979529628e-05, + "loss": 7.4882, + "step": 176000 + }, + { + "epoch": 0.67, + "learning_rate": 3.881002013562382e-05, + "loss": 7.5024, + "step": 176500 + }, + { + "epoch": 0.67, + "learning_rate": 3.877832047595137e-05, + "loss": 7.4859, + "step": 177000 + }, + { + "epoch": 0.68, + "learning_rate": 3.8746620816278916e-05, + "loss": 7.4837, + "step": 177500 + }, + { + "epoch": 0.68, + "learning_rate": 3.8714921156606466e-05, + "loss": 7.5076, + "step": 178000 + }, + { + "epoch": 0.68, + "learning_rate": 3.8683221496934017e-05, + "loss": 7.4998, + "step": 178500 + }, + { + "epoch": 0.68, + "learning_rate": 3.865152183726155e-05, + "loss": 7.491, + "step": 179000 + }, + { + "epoch": 0.68, + "learning_rate": 3.86198221775891e-05, + "loss": 7.4899, + "step": 179500 + }, + { + "epoch": 0.68, + "learning_rate": 3.8588122517916647e-05, + "loss": 7.488, + "step": 180000 + }, + { + "epoch": 0.69, + "learning_rate": 3.8556422858244197e-05, + "loss": 7.4895, + "step": 180500 + }, + { + "epoch": 0.69, + "learning_rate": 3.852472319857175e-05, + "loss": 7.4976, + "step": 181000 + }, + { + "epoch": 0.69, + "learning_rate": 3.849302353889928e-05, + "loss": 7.505, + "step": 181500 + }, + { + "epoch": 0.69, + "learning_rate": 3.846132387922683e-05, + "loss": 7.4959, + "step": 182000 + }, + { + "epoch": 0.69, + "learning_rate": 3.8429624219554383e-05, + "loss": 7.4872, + "step": 182500 + }, + { + "epoch": 0.7, + "learning_rate": 3.839792455988193e-05, + "loss": 7.5021, + "step": 183000 + }, + { + "epoch": 0.7, + "learning_rate": 3.836622490020948e-05, + "loss": 7.4847, + "step": 183500 + }, + { + "epoch": 0.7, + "learning_rate": 3.833452524053702e-05, + "loss": 7.4968, + "step": 184000 + }, + { + "epoch": 0.7, + "learning_rate": 3.8302825580864563e-05, + "loss": 7.4772, + "step": 184500 + }, + { + "epoch": 0.7, + "learning_rate": 3.8271125921192114e-05, + "loss": 7.479, + "step": 185000 + }, + { + "epoch": 0.71, + "learning_rate": 3.823942626151966e-05, + "loss": 7.4952, + "step": 185500 + }, + { + "epoch": 0.71, + "learning_rate": 3.820772660184721e-05, + "loss": 7.4871, + "step": 186000 + }, + { + "epoch": 0.71, + "learning_rate": 3.817602694217475e-05, + "loss": 7.4915, + "step": 186500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8144327282502294e-05, + "loss": 7.486, + "step": 187000 + }, + { + "epoch": 0.71, + "learning_rate": 3.8112627622829844e-05, + "loss": 7.4672, + "step": 187500 + }, + { + "epoch": 0.72, + "learning_rate": 3.808092796315739e-05, + "loss": 7.4786, + "step": 188000 + }, + { + "epoch": 0.72, + "learning_rate": 3.804922830348494e-05, + "loss": 7.4983, + "step": 188500 + }, + { + "epoch": 0.72, + "learning_rate": 3.801752864381248e-05, + "loss": 7.5075, + "step": 189000 + }, + { + "epoch": 0.72, + "learning_rate": 3.7985828984140024e-05, + "loss": 7.4893, + "step": 189500 + }, + { + "epoch": 0.72, + "learning_rate": 3.7954129324467574e-05, + "loss": 7.5036, + "step": 190000 + }, + { + "epoch": 0.72, + "learning_rate": 3.7922429664795124e-05, + "loss": 7.4761, + "step": 190500 + }, + { + "epoch": 0.73, + "learning_rate": 3.789073000512267e-05, + "loss": 7.4805, + "step": 191000 + }, + { + "epoch": 0.73, + "learning_rate": 3.785903034545021e-05, + "loss": 7.4845, + "step": 191500 + }, + { + "epoch": 0.73, + "learning_rate": 3.782733068577776e-05, + "loss": 7.4894, + "step": 192000 + }, + { + "epoch": 0.73, + "learning_rate": 3.7795631026105304e-05, + "loss": 7.4943, + "step": 192500 + }, + { + "epoch": 0.73, + "learning_rate": 3.7763931366432854e-05, + "loss": 7.4942, + "step": 193000 + }, + { + "epoch": 0.74, + "learning_rate": 3.77322317067604e-05, + "loss": 7.4964, + "step": 193500 + }, + { + "epoch": 0.74, + "learning_rate": 3.770053204708795e-05, + "loss": 7.4966, + "step": 194000 + }, + { + "epoch": 0.74, + "learning_rate": 3.766883238741549e-05, + "loss": 7.5028, + "step": 194500 + }, + { + "epoch": 0.74, + "learning_rate": 3.7637132727743034e-05, + "loss": 7.4922, + "step": 195000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7605433068070584e-05, + "loss": 7.5029, + "step": 195500 + }, + { + "epoch": 0.75, + "learning_rate": 3.7573733408398134e-05, + "loss": 7.48, + "step": 196000 + }, + { + "epoch": 0.75, + "learning_rate": 3.754203374872568e-05, + "loss": 7.4906, + "step": 196500 + }, + { + "epoch": 0.75, + "learning_rate": 3.751033408905322e-05, + "loss": 7.4937, + "step": 197000 + }, + { + "epoch": 0.75, + "learning_rate": 3.7478634429380764e-05, + "loss": 7.4733, + "step": 197500 + }, + { + "epoch": 0.75, + "learning_rate": 3.7446934769708314e-05, + "loss": 7.5046, + "step": 198000 + }, + { + "epoch": 0.76, + "learning_rate": 3.7415235110035864e-05, + "loss": 7.4886, + "step": 198500 + }, + { + "epoch": 0.76, + "learning_rate": 3.738353545036341e-05, + "loss": 7.4883, + "step": 199000 + }, + { + "epoch": 0.76, + "learning_rate": 3.735183579069095e-05, + "loss": 7.4905, + "step": 199500 + }, + { + "epoch": 0.76, + "learning_rate": 3.73201361310185e-05, + "loss": 7.4878, + "step": 200000 + }, + { + "epoch": 0.76, + "learning_rate": 3.7288436471346044e-05, + "loss": 7.4861, + "step": 200500 + }, + { + "epoch": 0.76, + "learning_rate": 3.7256736811673595e-05, + "loss": 7.4833, + "step": 201000 + }, + { + "epoch": 0.77, + "learning_rate": 3.722503715200114e-05, + "loss": 7.4814, + "step": 201500 + }, + { + "epoch": 0.77, + "learning_rate": 3.719333749232868e-05, + "loss": 7.4885, + "step": 202000 + }, + { + "epoch": 0.77, + "learning_rate": 3.716163783265623e-05, + "loss": 7.4981, + "step": 202500 + }, + { + "epoch": 0.77, + "learning_rate": 3.7129938172983775e-05, + "loss": 7.5081, + "step": 203000 + }, + { + "epoch": 0.77, + "learning_rate": 3.7098238513311325e-05, + "loss": 7.5021, + "step": 203500 + }, + { + "epoch": 0.78, + "learning_rate": 3.706653885363887e-05, + "loss": 7.4927, + "step": 204000 + }, + { + "epoch": 0.78, + "learning_rate": 3.703483919396641e-05, + "loss": 7.4876, + "step": 204500 + }, + { + "epoch": 0.78, + "learning_rate": 3.700313953429396e-05, + "loss": 7.4899, + "step": 205000 + }, + { + "epoch": 0.78, + "learning_rate": 3.6971439874621505e-05, + "loss": 7.4842, + "step": 205500 + }, + { + "epoch": 0.78, + "learning_rate": 3.6939740214949055e-05, + "loss": 7.4956, + "step": 206000 + }, + { + "epoch": 0.79, + "learning_rate": 3.6908040555276605e-05, + "loss": 7.4852, + "step": 206500 + }, + { + "epoch": 0.79, + "learning_rate": 3.687634089560414e-05, + "loss": 7.5173, + "step": 207000 + }, + { + "epoch": 0.79, + "learning_rate": 3.684464123593169e-05, + "loss": 7.4874, + "step": 207500 + }, + { + "epoch": 0.79, + "learning_rate": 3.681294157625924e-05, + "loss": 7.4758, + "step": 208000 + }, + { + "epoch": 0.79, + "learning_rate": 3.6781241916586785e-05, + "loss": 7.4979, + "step": 208500 + }, + { + "epoch": 0.8, + "learning_rate": 3.6749542256914335e-05, + "loss": 7.4813, + "step": 209000 + }, + { + "epoch": 0.8, + "learning_rate": 3.671784259724188e-05, + "loss": 7.4912, + "step": 209500 + }, + { + "epoch": 0.8, + "learning_rate": 3.668614293756942e-05, + "loss": 7.496, + "step": 210000 + }, + { + "epoch": 0.8, + "learning_rate": 3.665444327789697e-05, + "loss": 7.5007, + "step": 210500 + }, + { + "epoch": 0.8, + "learning_rate": 3.6622743618224515e-05, + "loss": 7.4935, + "step": 211000 + }, + { + "epoch": 0.8, + "learning_rate": 3.6591043958552065e-05, + "loss": 7.4968, + "step": 211500 + }, + { + "epoch": 0.81, + "learning_rate": 3.655934429887961e-05, + "loss": 7.4897, + "step": 212000 + }, + { + "epoch": 0.81, + "learning_rate": 3.652764463920715e-05, + "loss": 7.4955, + "step": 212500 + }, + { + "epoch": 0.81, + "learning_rate": 3.64959449795347e-05, + "loss": 7.5055, + "step": 213000 + }, + { + "epoch": 0.81, + "learning_rate": 3.646424531986225e-05, + "loss": 7.4854, + "step": 213500 + }, + { + "epoch": 0.81, + "learning_rate": 3.6432545660189795e-05, + "loss": 7.5171, + "step": 214000 + }, + { + "epoch": 0.82, + "learning_rate": 3.640084600051734e-05, + "loss": 7.4996, + "step": 214500 + }, + { + "epoch": 0.82, + "learning_rate": 3.636914634084488e-05, + "loss": 7.4906, + "step": 215000 + }, + { + "epoch": 0.82, + "learning_rate": 3.633744668117243e-05, + "loss": 7.4752, + "step": 215500 + }, + { + "epoch": 0.82, + "learning_rate": 3.630574702149998e-05, + "loss": 7.4973, + "step": 216000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6274047361827525e-05, + "loss": 7.4903, + "step": 216500 + }, + { + "epoch": 0.83, + "learning_rate": 3.624234770215507e-05, + "loss": 7.4801, + "step": 217000 + }, + { + "epoch": 0.83, + "learning_rate": 3.621064804248262e-05, + "loss": 7.4868, + "step": 217500 + }, + { + "epoch": 0.83, + "learning_rate": 3.617894838281016e-05, + "loss": 7.4925, + "step": 218000 + }, + { + "epoch": 0.83, + "learning_rate": 3.614724872313771e-05, + "loss": 7.4862, + "step": 218500 + }, + { + "epoch": 0.83, + "learning_rate": 3.6115549063465256e-05, + "loss": 7.4982, + "step": 219000 + }, + { + "epoch": 0.83, + "learning_rate": 3.60838494037928e-05, + "loss": 7.4778, + "step": 219500 + }, + { + "epoch": 0.84, + "learning_rate": 3.605214974412035e-05, + "loss": 7.5073, + "step": 220000 + }, + { + "epoch": 0.84, + "learning_rate": 3.602045008444789e-05, + "loss": 7.4798, + "step": 220500 + }, + { + "epoch": 0.84, + "learning_rate": 3.598875042477544e-05, + "loss": 7.4791, + "step": 221000 + }, + { + "epoch": 0.84, + "learning_rate": 3.595705076510299e-05, + "loss": 7.4905, + "step": 221500 + }, + { + "epoch": 0.84, + "learning_rate": 3.5925351105430536e-05, + "loss": 7.4877, + "step": 222000 + }, + { + "epoch": 0.85, + "learning_rate": 3.589365144575808e-05, + "loss": 7.4945, + "step": 222500 + }, + { + "epoch": 0.85, + "learning_rate": 3.586195178608562e-05, + "loss": 7.4819, + "step": 223000 + }, + { + "epoch": 0.85, + "learning_rate": 3.583025212641317e-05, + "loss": 7.4984, + "step": 223500 + }, + { + "epoch": 0.85, + "learning_rate": 3.579855246674072e-05, + "loss": 7.5007, + "step": 224000 + }, + { + "epoch": 0.85, + "learning_rate": 3.5766852807068266e-05, + "loss": 7.4948, + "step": 224500 + }, + { + "epoch": 0.86, + "learning_rate": 3.573515314739581e-05, + "loss": 7.5001, + "step": 225000 + }, + { + "epoch": 0.86, + "learning_rate": 3.570345348772336e-05, + "loss": 7.4981, + "step": 225500 + }, + { + "epoch": 0.86, + "learning_rate": 3.56717538280509e-05, + "loss": 7.4831, + "step": 226000 + }, + { + "epoch": 0.86, + "learning_rate": 3.564005416837845e-05, + "loss": 7.4941, + "step": 226500 + }, + { + "epoch": 0.86, + "learning_rate": 3.5608354508705996e-05, + "loss": 7.4782, + "step": 227000 + }, + { + "epoch": 0.87, + "learning_rate": 3.557665484903354e-05, + "loss": 7.4813, + "step": 227500 + }, + { + "epoch": 0.87, + "learning_rate": 3.554495518936109e-05, + "loss": 7.4642, + "step": 228000 + }, + { + "epoch": 0.87, + "learning_rate": 3.551325552968863e-05, + "loss": 7.4816, + "step": 228500 + }, + { + "epoch": 0.87, + "learning_rate": 3.548155587001618e-05, + "loss": 7.509, + "step": 229000 + }, + { + "epoch": 0.87, + "learning_rate": 3.5449856210343726e-05, + "loss": 7.4822, + "step": 229500 + }, + { + "epoch": 0.87, + "learning_rate": 3.541815655067127e-05, + "loss": 7.4954, + "step": 230000 + }, + { + "epoch": 0.88, + "learning_rate": 3.538645689099882e-05, + "loss": 7.4901, + "step": 230500 + }, + { + "epoch": 0.88, + "learning_rate": 3.535475723132637e-05, + "loss": 7.496, + "step": 231000 + }, + { + "epoch": 0.88, + "learning_rate": 3.532305757165391e-05, + "loss": 7.4993, + "step": 231500 + }, + { + "epoch": 0.88, + "learning_rate": 3.5291357911981456e-05, + "loss": 7.4869, + "step": 232000 + }, + { + "epoch": 0.88, + "learning_rate": 3.5259658252309e-05, + "loss": 7.4902, + "step": 232500 + }, + { + "epoch": 0.89, + "learning_rate": 3.522795859263655e-05, + "loss": 7.5146, + "step": 233000 + }, + { + "epoch": 0.89, + "learning_rate": 3.51962589329641e-05, + "loss": 7.4939, + "step": 233500 + }, + { + "epoch": 0.89, + "learning_rate": 3.516455927329164e-05, + "loss": 7.4795, + "step": 234000 + }, + { + "epoch": 0.89, + "learning_rate": 3.513285961361919e-05, + "loss": 7.4806, + "step": 234500 + }, + { + "epoch": 0.89, + "learning_rate": 3.5101159953946737e-05, + "loss": 7.4974, + "step": 235000 + }, + { + "epoch": 0.9, + "learning_rate": 3.506946029427428e-05, + "loss": 7.4941, + "step": 235500 + }, + { + "epoch": 0.9, + "learning_rate": 3.503776063460183e-05, + "loss": 7.4865, + "step": 236000 + }, + { + "epoch": 0.9, + "learning_rate": 3.500606097492937e-05, + "loss": 7.4884, + "step": 236500 + }, + { + "epoch": 0.9, + "learning_rate": 3.497436131525692e-05, + "loss": 7.4856, + "step": 237000 + }, + { + "epoch": 0.9, + "learning_rate": 3.494266165558447e-05, + "loss": 7.4888, + "step": 237500 + }, + { + "epoch": 0.91, + "learning_rate": 3.491096199591201e-05, + "loss": 7.4799, + "step": 238000 + }, + { + "epoch": 0.91, + "learning_rate": 3.487926233623956e-05, + "loss": 7.503, + "step": 238500 + }, + { + "epoch": 0.91, + "learning_rate": 3.484756267656711e-05, + "loss": 7.4893, + "step": 239000 + }, + { + "epoch": 0.91, + "learning_rate": 3.4815863016894653e-05, + "loss": 7.4764, + "step": 239500 + }, + { + "epoch": 0.91, + "learning_rate": 3.47841633572222e-05, + "loss": 7.4831, + "step": 240000 + }, + { + "epoch": 0.91, + "learning_rate": 3.475246369754975e-05, + "loss": 7.4935, + "step": 240500 + }, + { + "epoch": 0.92, + "learning_rate": 3.472076403787729e-05, + "loss": 7.4765, + "step": 241000 + }, + { + "epoch": 0.92, + "learning_rate": 3.468906437820484e-05, + "loss": 7.4874, + "step": 241500 + }, + { + "epoch": 0.92, + "learning_rate": 3.4657364718532384e-05, + "loss": 7.4747, + "step": 242000 + }, + { + "epoch": 0.92, + "learning_rate": 3.462566505885993e-05, + "loss": 7.4763, + "step": 242500 + }, + { + "epoch": 0.92, + "learning_rate": 3.459396539918748e-05, + "loss": 7.4834, + "step": 243000 + }, + { + "epoch": 0.93, + "learning_rate": 3.456226573951502e-05, + "loss": 7.4977, + "step": 243500 + }, + { + "epoch": 0.93, + "learning_rate": 3.453056607984257e-05, + "loss": 7.4831, + "step": 244000 + }, + { + "epoch": 0.93, + "learning_rate": 3.449886642017012e-05, + "loss": 7.4984, + "step": 244500 + }, + { + "epoch": 0.93, + "learning_rate": 3.446716676049766e-05, + "loss": 7.4969, + "step": 245000 + }, + { + "epoch": 0.93, + "learning_rate": 3.443546710082521e-05, + "loss": 7.4881, + "step": 245500 + }, + { + "epoch": 0.94, + "learning_rate": 3.440376744115275e-05, + "loss": 7.4912, + "step": 246000 + }, + { + "epoch": 0.94, + "learning_rate": 3.43720677814803e-05, + "loss": 7.4867, + "step": 246500 + }, + { + "epoch": 0.94, + "learning_rate": 3.434036812180785e-05, + "loss": 7.4966, + "step": 247000 + }, + { + "epoch": 0.94, + "learning_rate": 3.430866846213539e-05, + "loss": 7.498, + "step": 247500 + }, + { + "epoch": 0.94, + "learning_rate": 3.427696880246294e-05, + "loss": 7.4854, + "step": 248000 + }, + { + "epoch": 0.95, + "learning_rate": 3.424526914279049e-05, + "loss": 7.5006, + "step": 248500 + }, + { + "epoch": 0.95, + "learning_rate": 3.421356948311803e-05, + "loss": 7.4864, + "step": 249000 + }, + { + "epoch": 0.95, + "learning_rate": 3.418186982344558e-05, + "loss": 7.4976, + "step": 249500 + }, + { + "epoch": 0.95, + "learning_rate": 3.4150170163773124e-05, + "loss": 7.5078, + "step": 250000 + }, + { + "epoch": 0.95, + "learning_rate": 3.411847050410067e-05, + "loss": 7.4829, + "step": 250500 + }, + { + "epoch": 0.95, + "learning_rate": 3.408677084442822e-05, + "loss": 7.477, + "step": 251000 + }, + { + "epoch": 0.96, + "learning_rate": 3.405507118475576e-05, + "loss": 7.4765, + "step": 251500 + }, + { + "epoch": 0.96, + "learning_rate": 3.402337152508331e-05, + "loss": 7.4881, + "step": 252000 + }, + { + "epoch": 0.96, + "learning_rate": 3.3991671865410854e-05, + "loss": 7.481, + "step": 252500 + }, + { + "epoch": 0.96, + "learning_rate": 3.39599722057384e-05, + "loss": 7.4841, + "step": 253000 + }, + { + "epoch": 0.96, + "learning_rate": 3.392827254606595e-05, + "loss": 7.4838, + "step": 253500 + }, + { + "epoch": 0.97, + "learning_rate": 3.389657288639349e-05, + "loss": 7.4762, + "step": 254000 + }, + { + "epoch": 0.97, + "learning_rate": 3.386487322672104e-05, + "loss": 7.4998, + "step": 254500 + }, + { + "epoch": 0.97, + "learning_rate": 3.3833173567048584e-05, + "loss": 7.4876, + "step": 255000 + }, + { + "epoch": 0.97, + "learning_rate": 3.380147390737613e-05, + "loss": 7.4914, + "step": 255500 + }, + { + "epoch": 0.97, + "learning_rate": 3.376977424770368e-05, + "loss": 7.505, + "step": 256000 + }, + { + "epoch": 0.98, + "learning_rate": 3.373807458803123e-05, + "loss": 7.493, + "step": 256500 + }, + { + "epoch": 0.98, + "learning_rate": 3.370637492835877e-05, + "loss": 7.4727, + "step": 257000 + }, + { + "epoch": 0.98, + "learning_rate": 3.3674675268686315e-05, + "loss": 7.4961, + "step": 257500 + }, + { + "epoch": 0.98, + "learning_rate": 3.3642975609013865e-05, + "loss": 7.4797, + "step": 258000 + }, + { + "epoch": 0.98, + "learning_rate": 3.361127594934141e-05, + "loss": 7.4963, + "step": 258500 + }, + { + "epoch": 0.99, + "learning_rate": 3.357957628966896e-05, + "loss": 7.4847, + "step": 259000 + }, + { + "epoch": 0.99, + "learning_rate": 3.35478766299965e-05, + "loss": 7.4999, + "step": 259500 + }, + { + "epoch": 0.99, + "learning_rate": 3.3516176970324045e-05, + "loss": 7.4945, + "step": 260000 + }, + { + "epoch": 0.99, + "learning_rate": 3.3484477310651595e-05, + "loss": 7.4948, + "step": 260500 + }, + { + "epoch": 0.99, + "learning_rate": 3.345277765097914e-05, + "loss": 7.482, + "step": 261000 + }, + { + "epoch": 0.99, + "learning_rate": 3.342107799130669e-05, + "loss": 7.49, + "step": 261500 + }, + { + "epoch": 1.0, + "learning_rate": 3.338937833163424e-05, + "loss": 7.4875, + "step": 262000 + }, + { + "epoch": 1.0, + "learning_rate": 3.335767867196178e-05, + "loss": 7.4904, + "step": 262500 + }, + { + "epoch": 1.0, + "learning_rate": 3.3325979012289325e-05, + "loss": 7.489, + "step": 263000 + }, + { + "epoch": 1.0, + "learning_rate": 3.329427935261687e-05, + "loss": 7.4787, + "step": 263500 + }, + { + "epoch": 1.0, + "learning_rate": 3.326257969294442e-05, + "loss": 7.5075, + "step": 264000 + }, + { + "epoch": 1.01, + "learning_rate": 3.323088003327197e-05, + "loss": 7.4919, + "step": 264500 + }, + { + "epoch": 1.01, + "learning_rate": 3.319918037359951e-05, + "loss": 7.4871, + "step": 265000 + }, + { + "epoch": 1.01, + "learning_rate": 3.3167480713927055e-05, + "loss": 7.488, + "step": 265500 + }, + { + "epoch": 1.01, + "learning_rate": 3.3135781054254605e-05, + "loss": 7.505, + "step": 266000 + }, + { + "epoch": 1.01, + "learning_rate": 3.310408139458215e-05, + "loss": 7.5001, + "step": 266500 + }, + { + "epoch": 1.02, + "learning_rate": 3.30723817349097e-05, + "loss": 7.4784, + "step": 267000 + }, + { + "epoch": 1.02, + "learning_rate": 3.304068207523724e-05, + "loss": 7.4899, + "step": 267500 + }, + { + "epoch": 1.02, + "learning_rate": 3.3008982415564785e-05, + "loss": 7.4622, + "step": 268000 + }, + { + "epoch": 1.02, + "learning_rate": 3.2977282755892335e-05, + "loss": 7.4945, + "step": 268500 + }, + { + "epoch": 1.02, + "learning_rate": 3.294558309621988e-05, + "loss": 7.4948, + "step": 269000 + }, + { + "epoch": 1.03, + "learning_rate": 3.291388343654743e-05, + "loss": 7.4886, + "step": 269500 + }, + { + "epoch": 1.03, + "learning_rate": 3.288218377687497e-05, + "loss": 7.4796, + "step": 270000 + }, + { + "epoch": 1.03, + "learning_rate": 3.2850484117202515e-05, + "loss": 7.4996, + "step": 270500 + }, + { + "epoch": 1.03, + "learning_rate": 3.2818784457530065e-05, + "loss": 7.4927, + "step": 271000 + }, + { + "epoch": 1.03, + "learning_rate": 3.278708479785761e-05, + "loss": 7.4866, + "step": 271500 + }, + { + "epoch": 1.03, + "learning_rate": 3.275538513818516e-05, + "loss": 7.5126, + "step": 272000 + }, + { + "epoch": 1.04, + "learning_rate": 3.272368547851271e-05, + "loss": 7.4901, + "step": 272500 + }, + { + "epoch": 1.04, + "learning_rate": 3.2691985818840245e-05, + "loss": 7.4772, + "step": 273000 + }, + { + "epoch": 1.04, + "learning_rate": 3.2660286159167795e-05, + "loss": 7.4876, + "step": 273500 + }, + { + "epoch": 1.04, + "learning_rate": 3.2628586499495346e-05, + "loss": 7.4819, + "step": 274000 + }, + { + "epoch": 1.04, + "learning_rate": 3.259688683982289e-05, + "loss": 7.4863, + "step": 274500 + }, + { + "epoch": 1.05, + "learning_rate": 3.256518718015044e-05, + "loss": 7.4794, + "step": 275000 + }, + { + "epoch": 1.05, + "learning_rate": 3.253348752047798e-05, + "loss": 7.4968, + "step": 275500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2501787860805526e-05, + "loss": 7.4921, + "step": 276000 + }, + { + "epoch": 1.05, + "learning_rate": 3.2470088201133076e-05, + "loss": 7.4844, + "step": 276500 + }, + { + "epoch": 1.05, + "learning_rate": 3.243838854146062e-05, + "loss": 7.5079, + "step": 277000 + }, + { + "epoch": 1.06, + "learning_rate": 3.240668888178817e-05, + "loss": 7.4975, + "step": 277500 + }, + { + "epoch": 1.06, + "learning_rate": 3.237498922211571e-05, + "loss": 7.4969, + "step": 278000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2343289562443256e-05, + "loss": 7.5057, + "step": 278500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2311589902770806e-05, + "loss": 7.4997, + "step": 279000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2279890243098356e-05, + "loss": 7.4847, + "step": 279500 + }, + { + "epoch": 1.07, + "learning_rate": 3.22481905834259e-05, + "loss": 7.4911, + "step": 280000 + }, + { + "epoch": 1.07, + "learning_rate": 3.221649092375344e-05, + "loss": 7.5051, + "step": 280500 + }, + { + "epoch": 1.07, + "learning_rate": 3.2184791264080986e-05, + "loss": 7.4742, + "step": 281000 + }, + { + "epoch": 1.07, + "learning_rate": 3.2153091604408536e-05, + "loss": 7.4969, + "step": 281500 + }, + { + "epoch": 1.07, + "learning_rate": 3.2121391944736086e-05, + "loss": 7.4884, + "step": 282000 + }, + { + "epoch": 1.07, + "learning_rate": 3.208969228506363e-05, + "loss": 7.4978, + "step": 282500 + }, + { + "epoch": 1.08, + "learning_rate": 3.205799262539117e-05, + "loss": 7.4849, + "step": 283000 + }, + { + "epoch": 1.08, + "learning_rate": 3.202629296571872e-05, + "loss": 7.4939, + "step": 283500 + }, + { + "epoch": 1.08, + "learning_rate": 3.1994593306046266e-05, + "loss": 7.4986, + "step": 284000 + }, + { + "epoch": 1.08, + "learning_rate": 3.1962893646373816e-05, + "loss": 7.4876, + "step": 284500 + }, + { + "epoch": 1.08, + "learning_rate": 3.193119398670136e-05, + "loss": 7.4895, + "step": 285000 + }, + { + "epoch": 1.09, + "learning_rate": 3.18994943270289e-05, + "loss": 7.49, + "step": 285500 + }, + { + "epoch": 1.09, + "learning_rate": 3.186779466735645e-05, + "loss": 7.4858, + "step": 286000 + }, + { + "epoch": 1.09, + "learning_rate": 3.1836095007683996e-05, + "loss": 7.492, + "step": 286500 + }, + { + "epoch": 1.09, + "learning_rate": 3.1804395348011546e-05, + "loss": 7.4854, + "step": 287000 + }, + { + "epoch": 1.09, + "learning_rate": 3.1772695688339096e-05, + "loss": 7.4993, + "step": 287500 + }, + { + "epoch": 1.1, + "learning_rate": 3.174099602866663e-05, + "loss": 7.4795, + "step": 288000 + }, + { + "epoch": 1.1, + "learning_rate": 3.170929636899418e-05, + "loss": 7.4866, + "step": 288500 + }, + { + "epoch": 1.1, + "learning_rate": 3.167759670932173e-05, + "loss": 7.4897, + "step": 289000 + }, + { + "epoch": 1.1, + "learning_rate": 3.1645897049649276e-05, + "loss": 7.4926, + "step": 289500 + }, + { + "epoch": 1.1, + "learning_rate": 3.1614197389976827e-05, + "loss": 7.4848, + "step": 290000 + }, + { + "epoch": 1.11, + "learning_rate": 3.158249773030437e-05, + "loss": 7.4971, + "step": 290500 + }, + { + "epoch": 1.11, + "learning_rate": 3.155079807063191e-05, + "loss": 7.4849, + "step": 291000 + }, + { + "epoch": 1.11, + "learning_rate": 3.151909841095946e-05, + "loss": 7.4962, + "step": 291500 + }, + { + "epoch": 1.11, + "learning_rate": 3.1487398751287007e-05, + "loss": 7.4818, + "step": 292000 + }, + { + "epoch": 1.11, + "learning_rate": 3.145569909161456e-05, + "loss": 7.4792, + "step": 292500 + }, + { + "epoch": 1.11, + "learning_rate": 3.14239994319421e-05, + "loss": 7.4821, + "step": 293000 + }, + { + "epoch": 1.12, + "learning_rate": 3.139229977226964e-05, + "loss": 7.4866, + "step": 293500 + }, + { + "epoch": 1.12, + "learning_rate": 3.1360600112597193e-05, + "loss": 7.4831, + "step": 294000 + }, + { + "epoch": 1.12, + "learning_rate": 3.132890045292474e-05, + "loss": 7.481, + "step": 294500 + }, + { + "epoch": 1.12, + "learning_rate": 3.129720079325229e-05, + "loss": 7.495, + "step": 295000 + }, + { + "epoch": 1.12, + "learning_rate": 3.126550113357983e-05, + "loss": 7.4764, + "step": 295500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1233801473907373e-05, + "loss": 7.5023, + "step": 296000 + }, + { + "epoch": 1.13, + "learning_rate": 3.1202101814234924e-05, + "loss": 7.5015, + "step": 296500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1170402154562474e-05, + "loss": 7.4767, + "step": 297000 + }, + { + "epoch": 1.13, + "learning_rate": 3.113870249489002e-05, + "loss": 7.4763, + "step": 297500 + }, + { + "epoch": 1.13, + "learning_rate": 3.110700283521756e-05, + "loss": 7.4957, + "step": 298000 + }, + { + "epoch": 1.14, + "learning_rate": 3.1075303175545104e-05, + "loss": 7.5016, + "step": 298500 + }, + { + "epoch": 1.14, + "learning_rate": 3.1043603515872654e-05, + "loss": 7.4787, + "step": 299000 + }, + { + "epoch": 1.14, + "learning_rate": 3.1011903856200204e-05, + "loss": 7.484, + "step": 299500 + }, + { + "epoch": 1.14, + "learning_rate": 3.098020419652775e-05, + "loss": 7.4945, + "step": 300000 + }, + { + "epoch": 1.14, + "learning_rate": 3.09485045368553e-05, + "loss": 7.5016, + "step": 300500 + }, + { + "epoch": 1.14, + "learning_rate": 3.091680487718284e-05, + "loss": 7.4848, + "step": 301000 + }, + { + "epoch": 1.15, + "learning_rate": 3.0885105217510384e-05, + "loss": 7.4909, + "step": 301500 + }, + { + "epoch": 1.15, + "learning_rate": 3.0853405557837934e-05, + "loss": 7.4784, + "step": 302000 + }, + { + "epoch": 1.15, + "learning_rate": 3.082170589816548e-05, + "loss": 7.4951, + "step": 302500 + }, + { + "epoch": 1.15, + "learning_rate": 3.079000623849303e-05, + "loss": 7.4843, + "step": 303000 + }, + { + "epoch": 1.15, + "learning_rate": 3.075830657882057e-05, + "loss": 7.5043, + "step": 303500 + }, + { + "epoch": 1.16, + "learning_rate": 3.0726606919148114e-05, + "loss": 7.4835, + "step": 304000 + }, + { + "epoch": 1.16, + "learning_rate": 3.0694907259475664e-05, + "loss": 7.4967, + "step": 304500 + }, + { + "epoch": 1.16, + "learning_rate": 3.0663207599803214e-05, + "loss": 7.4946, + "step": 305000 + }, + { + "epoch": 1.16, + "learning_rate": 3.063150794013076e-05, + "loss": 7.4888, + "step": 305500 + }, + { + "epoch": 1.16, + "learning_rate": 3.05998082804583e-05, + "loss": 7.4794, + "step": 306000 + }, + { + "epoch": 1.17, + "learning_rate": 3.056810862078585e-05, + "loss": 7.4754, + "step": 306500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0536408961113394e-05, + "loss": 7.494, + "step": 307000 + }, + { + "epoch": 1.17, + "learning_rate": 3.050470930144094e-05, + "loss": 7.4847, + "step": 307500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0473009641768484e-05, + "loss": 7.4921, + "step": 308000 + }, + { + "epoch": 1.17, + "learning_rate": 3.0441309982096034e-05, + "loss": 7.4799, + "step": 308500 + }, + { + "epoch": 1.18, + "learning_rate": 3.040961032242358e-05, + "loss": 7.5017, + "step": 309000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0377910662751124e-05, + "loss": 7.4774, + "step": 309500 + }, + { + "epoch": 1.18, + "learning_rate": 3.0346211003078674e-05, + "loss": 7.4762, + "step": 310000 + }, + { + "epoch": 1.18, + "learning_rate": 3.031451134340622e-05, + "loss": 7.4793, + "step": 310500 + }, + { + "epoch": 1.18, + "learning_rate": 3.0282811683733764e-05, + "loss": 7.5039, + "step": 311000 + }, + { + "epoch": 1.18, + "learning_rate": 3.025111202406131e-05, + "loss": 7.4839, + "step": 311500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0219412364388854e-05, + "loss": 7.484, + "step": 312000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0187712704716405e-05, + "loss": 7.4838, + "step": 312500 + }, + { + "epoch": 1.19, + "learning_rate": 3.015601304504395e-05, + "loss": 7.4876, + "step": 313000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0124313385371495e-05, + "loss": 7.4747, + "step": 313500 + }, + { + "epoch": 1.19, + "learning_rate": 3.009261372569904e-05, + "loss": 7.4817, + "step": 314000 + }, + { + "epoch": 1.2, + "learning_rate": 3.006091406602659e-05, + "loss": 7.4912, + "step": 314500 + }, + { + "epoch": 1.2, + "learning_rate": 3.0029214406354135e-05, + "loss": 7.4828, + "step": 315000 + }, + { + "epoch": 1.2, + "learning_rate": 2.999751474668168e-05, + "loss": 7.4845, + "step": 315500 + }, + { + "epoch": 1.2, + "learning_rate": 2.9965815087009225e-05, + "loss": 7.4827, + "step": 316000 + }, + { + "epoch": 1.2, + "learning_rate": 2.993411542733677e-05, + "loss": 7.4897, + "step": 316500 + }, + { + "epoch": 1.21, + "learning_rate": 2.990241576766432e-05, + "loss": 7.4898, + "step": 317000 + }, + { + "epoch": 1.21, + "learning_rate": 2.9870716107991865e-05, + "loss": 7.4848, + "step": 317500 + }, + { + "epoch": 1.21, + "learning_rate": 2.983901644831941e-05, + "loss": 7.4849, + "step": 318000 + }, + { + "epoch": 1.21, + "learning_rate": 2.980731678864696e-05, + "loss": 7.484, + "step": 318500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9775617128974505e-05, + "loss": 7.4866, + "step": 319000 + }, + { + "epoch": 1.22, + "learning_rate": 2.974391746930205e-05, + "loss": 7.4855, + "step": 319500 + }, + { + "epoch": 1.22, + "learning_rate": 2.97122178096296e-05, + "loss": 7.483, + "step": 320000 + }, + { + "epoch": 1.22, + "learning_rate": 2.968051814995714e-05, + "loss": 7.4946, + "step": 320500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9648818490284692e-05, + "loss": 7.4954, + "step": 321000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9617118830612235e-05, + "loss": 7.4927, + "step": 321500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9585419170939782e-05, + "loss": 7.4893, + "step": 322000 + }, + { + "epoch": 1.23, + "learning_rate": 2.9553719511267332e-05, + "loss": 7.4781, + "step": 322500 + }, + { + "epoch": 1.23, + "learning_rate": 2.9522019851594872e-05, + "loss": 7.4867, + "step": 323000 + }, + { + "epoch": 1.23, + "learning_rate": 2.9490320191922422e-05, + "loss": 7.489, + "step": 323500 + }, + { + "epoch": 1.23, + "learning_rate": 2.945862053224997e-05, + "loss": 7.4847, + "step": 324000 + }, + { + "epoch": 1.23, + "learning_rate": 2.9426920872577512e-05, + "loss": 7.4949, + "step": 324500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9395221212905062e-05, + "loss": 7.4837, + "step": 325000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9363521553232602e-05, + "loss": 7.4915, + "step": 325500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9331821893560152e-05, + "loss": 7.5058, + "step": 326000 + }, + { + "epoch": 1.24, + "learning_rate": 2.93001222338877e-05, + "loss": 7.4839, + "step": 326500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9268422574215242e-05, + "loss": 7.5023, + "step": 327000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9236722914542792e-05, + "loss": 7.4928, + "step": 327500 + }, + { + "epoch": 1.25, + "learning_rate": 2.920502325487034e-05, + "loss": 7.4934, + "step": 328000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9173323595197882e-05, + "loss": 7.4933, + "step": 328500 + }, + { + "epoch": 1.25, + "learning_rate": 2.9141623935525432e-05, + "loss": 7.4961, + "step": 329000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9109924275852972e-05, + "loss": 7.4945, + "step": 329500 + }, + { + "epoch": 1.26, + "learning_rate": 2.9078224616180522e-05, + "loss": 7.4747, + "step": 330000 + }, + { + "epoch": 1.26, + "learning_rate": 2.904652495650807e-05, + "loss": 7.4819, + "step": 330500 + }, + { + "epoch": 1.26, + "learning_rate": 2.9014825296835612e-05, + "loss": 7.4906, + "step": 331000 + }, + { + "epoch": 1.26, + "learning_rate": 2.8983125637163162e-05, + "loss": 7.4994, + "step": 331500 + }, + { + "epoch": 1.26, + "learning_rate": 2.895142597749071e-05, + "loss": 7.4856, + "step": 332000 + }, + { + "epoch": 1.26, + "learning_rate": 2.8919726317818252e-05, + "loss": 7.4877, + "step": 332500 + }, + { + "epoch": 1.27, + "learning_rate": 2.88880266581458e-05, + "loss": 7.487, + "step": 333000 + }, + { + "epoch": 1.27, + "learning_rate": 2.8856326998473342e-05, + "loss": 7.4821, + "step": 333500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8824627338800892e-05, + "loss": 7.4815, + "step": 334000 + }, + { + "epoch": 1.27, + "learning_rate": 2.879292767912844e-05, + "loss": 7.4797, + "step": 334500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8761228019455983e-05, + "loss": 7.4878, + "step": 335000 + }, + { + "epoch": 1.28, + "learning_rate": 2.872952835978353e-05, + "loss": 7.4823, + "step": 335500 + }, + { + "epoch": 1.28, + "learning_rate": 2.869782870011108e-05, + "loss": 7.4816, + "step": 336000 + }, + { + "epoch": 1.28, + "learning_rate": 2.8666129040438623e-05, + "loss": 7.4992, + "step": 336500 + }, + { + "epoch": 1.28, + "learning_rate": 2.863442938076617e-05, + "loss": 7.4795, + "step": 337000 + }, + { + "epoch": 1.28, + "learning_rate": 2.860272972109372e-05, + "loss": 7.4896, + "step": 337500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8571030061421263e-05, + "loss": 7.4846, + "step": 338000 + }, + { + "epoch": 1.29, + "learning_rate": 2.853933040174881e-05, + "loss": 7.4808, + "step": 338500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8507630742076353e-05, + "loss": 7.4826, + "step": 339000 + }, + { + "epoch": 1.29, + "learning_rate": 2.84759310824039e-05, + "loss": 7.4996, + "step": 339500 + }, + { + "epoch": 1.29, + "learning_rate": 2.844423142273145e-05, + "loss": 7.492, + "step": 340000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8412531763058993e-05, + "loss": 7.4846, + "step": 340500 + }, + { + "epoch": 1.3, + "learning_rate": 2.838083210338654e-05, + "loss": 7.4898, + "step": 341000 + }, + { + "epoch": 1.3, + "learning_rate": 2.834913244371409e-05, + "loss": 7.5043, + "step": 341500 + }, + { + "epoch": 1.3, + "learning_rate": 2.831743278404163e-05, + "loss": 7.4931, + "step": 342000 + }, + { + "epoch": 1.3, + "learning_rate": 2.828573312436918e-05, + "loss": 7.4835, + "step": 342500 + }, + { + "epoch": 1.3, + "learning_rate": 2.8254033464696723e-05, + "loss": 7.4702, + "step": 343000 + }, + { + "epoch": 1.31, + "learning_rate": 2.822233380502427e-05, + "loss": 7.4893, + "step": 343500 + }, + { + "epoch": 1.31, + "learning_rate": 2.819063414535182e-05, + "loss": 7.4891, + "step": 344000 + }, + { + "epoch": 1.31, + "learning_rate": 2.815893448567936e-05, + "loss": 7.4892, + "step": 344500 + }, + { + "epoch": 1.31, + "learning_rate": 2.812723482600691e-05, + "loss": 7.493, + "step": 345000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8095535166334457e-05, + "loss": 7.4759, + "step": 345500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8063835506662e-05, + "loss": 7.4783, + "step": 346000 + }, + { + "epoch": 1.32, + "learning_rate": 2.803213584698955e-05, + "loss": 7.4879, + "step": 346500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8000436187317093e-05, + "loss": 7.4805, + "step": 347000 + }, + { + "epoch": 1.32, + "learning_rate": 2.796873652764464e-05, + "loss": 7.4743, + "step": 347500 + }, + { + "epoch": 1.32, + "learning_rate": 2.7937036867972187e-05, + "loss": 7.4829, + "step": 348000 + }, + { + "epoch": 1.33, + "learning_rate": 2.790533720829973e-05, + "loss": 7.4825, + "step": 348500 + }, + { + "epoch": 1.33, + "learning_rate": 2.787363754862728e-05, + "loss": 7.4946, + "step": 349000 + }, + { + "epoch": 1.33, + "learning_rate": 2.7841937888954827e-05, + "loss": 7.4916, + "step": 349500 + }, + { + "epoch": 1.33, + "learning_rate": 2.781023822928237e-05, + "loss": 7.4735, + "step": 350000 + }, + { + "epoch": 1.33, + "learning_rate": 2.777853856960992e-05, + "loss": 7.4845, + "step": 350500 + }, + { + "epoch": 1.34, + "learning_rate": 2.774683890993746e-05, + "loss": 7.4936, + "step": 351000 + }, + { + "epoch": 1.34, + "learning_rate": 2.771513925026501e-05, + "loss": 7.4954, + "step": 351500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7683439590592557e-05, + "loss": 7.4748, + "step": 352000 + }, + { + "epoch": 1.34, + "learning_rate": 2.76517399309201e-05, + "loss": 7.5012, + "step": 352500 + }, + { + "epoch": 1.34, + "learning_rate": 2.762004027124765e-05, + "loss": 7.4788, + "step": 353000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7588340611575197e-05, + "loss": 7.487, + "step": 353500 + }, + { + "epoch": 1.35, + "learning_rate": 2.755664095190274e-05, + "loss": 7.4891, + "step": 354000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7524941292230287e-05, + "loss": 7.4856, + "step": 354500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7493241632557837e-05, + "loss": 7.4819, + "step": 355000 + }, + { + "epoch": 1.35, + "learning_rate": 2.746154197288538e-05, + "loss": 7.4803, + "step": 355500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7429842313212927e-05, + "loss": 7.4736, + "step": 356000 + }, + { + "epoch": 1.36, + "learning_rate": 2.739814265354047e-05, + "loss": 7.4798, + "step": 356500 + }, + { + "epoch": 1.36, + "learning_rate": 2.736644299386802e-05, + "loss": 7.5073, + "step": 357000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7334743334195567e-05, + "loss": 7.4815, + "step": 357500 + }, + { + "epoch": 1.36, + "learning_rate": 2.730304367452311e-05, + "loss": 7.4827, + "step": 358000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7271344014850657e-05, + "loss": 7.4744, + "step": 358500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7239644355178207e-05, + "loss": 7.4977, + "step": 359000 + }, + { + "epoch": 1.37, + "learning_rate": 2.720794469550575e-05, + "loss": 7.4811, + "step": 359500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7176245035833297e-05, + "loss": 7.4886, + "step": 360000 + }, + { + "epoch": 1.37, + "learning_rate": 2.714454537616084e-05, + "loss": 7.4825, + "step": 360500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7112845716488387e-05, + "loss": 7.4851, + "step": 361000 + }, + { + "epoch": 1.38, + "learning_rate": 2.7081146056815938e-05, + "loss": 7.5017, + "step": 361500 + }, + { + "epoch": 1.38, + "learning_rate": 2.704944639714348e-05, + "loss": 7.4827, + "step": 362000 + }, + { + "epoch": 1.38, + "learning_rate": 2.7017746737471028e-05, + "loss": 7.4915, + "step": 362500 + }, + { + "epoch": 1.38, + "learning_rate": 2.6986047077798578e-05, + "loss": 7.501, + "step": 363000 + }, + { + "epoch": 1.38, + "learning_rate": 2.6954347418126118e-05, + "loss": 7.4834, + "step": 363500 + }, + { + "epoch": 1.38, + "learning_rate": 2.6922647758453668e-05, + "loss": 7.484, + "step": 364000 + }, + { + "epoch": 1.39, + "learning_rate": 2.689094809878121e-05, + "loss": 7.4934, + "step": 364500 + }, + { + "epoch": 1.39, + "learning_rate": 2.6859248439108758e-05, + "loss": 7.4851, + "step": 365000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6827548779436308e-05, + "loss": 7.4833, + "step": 365500 + }, + { + "epoch": 1.39, + "learning_rate": 2.679584911976385e-05, + "loss": 7.4964, + "step": 366000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6764149460091398e-05, + "loss": 7.4807, + "step": 366500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6732449800418944e-05, + "loss": 7.4819, + "step": 367000 + }, + { + "epoch": 1.4, + "learning_rate": 2.6700750140746488e-05, + "loss": 7.4797, + "step": 367500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6669050481074038e-05, + "loss": 7.4831, + "step": 368000 + }, + { + "epoch": 1.4, + "learning_rate": 2.6637350821401585e-05, + "loss": 7.4914, + "step": 368500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6605651161729128e-05, + "loss": 7.4967, + "step": 369000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6573951502056678e-05, + "loss": 7.4916, + "step": 369500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6542251842384218e-05, + "loss": 7.4784, + "step": 370000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6510552182711768e-05, + "loss": 7.507, + "step": 370500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6478852523039315e-05, + "loss": 7.4964, + "step": 371000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6447152863366858e-05, + "loss": 7.4925, + "step": 371500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6415453203694408e-05, + "loss": 7.4782, + "step": 372000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6383753544021955e-05, + "loss": 7.4683, + "step": 372500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6352053884349498e-05, + "loss": 7.4785, + "step": 373000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6320354224677045e-05, + "loss": 7.4876, + "step": 373500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6288654565004588e-05, + "loss": 7.4889, + "step": 374000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6256954905332138e-05, + "loss": 7.4773, + "step": 374500 + }, + { + "epoch": 1.43, + "learning_rate": 2.6225255245659685e-05, + "loss": 7.4971, + "step": 375000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6193555585987228e-05, + "loss": 7.4915, + "step": 375500 + }, + { + "epoch": 1.43, + "learning_rate": 2.6161855926314775e-05, + "loss": 7.5012, + "step": 376000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6130156266642325e-05, + "loss": 7.488, + "step": 376500 + }, + { + "epoch": 1.43, + "learning_rate": 2.609845660696987e-05, + "loss": 7.5013, + "step": 377000 + }, + { + "epoch": 1.44, + "learning_rate": 2.6066756947297415e-05, + "loss": 7.4978, + "step": 377500 + }, + { + "epoch": 1.44, + "learning_rate": 2.603505728762496e-05, + "loss": 7.489, + "step": 378000 + }, + { + "epoch": 1.44, + "learning_rate": 2.600335762795251e-05, + "loss": 7.4857, + "step": 378500 + }, + { + "epoch": 1.44, + "learning_rate": 2.5971657968280055e-05, + "loss": 7.4906, + "step": 379000 + }, + { + "epoch": 1.44, + "learning_rate": 2.59399583086076e-05, + "loss": 7.486, + "step": 379500 + }, + { + "epoch": 1.45, + "learning_rate": 2.5908258648935145e-05, + "loss": 7.4792, + "step": 380000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5876558989262695e-05, + "loss": 7.4805, + "step": 380500 + }, + { + "epoch": 1.45, + "learning_rate": 2.584485932959024e-05, + "loss": 7.4858, + "step": 381000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5813159669917785e-05, + "loss": 7.5006, + "step": 381500 + }, + { + "epoch": 1.45, + "learning_rate": 2.578146001024533e-05, + "loss": 7.4816, + "step": 382000 + }, + { + "epoch": 1.46, + "learning_rate": 2.5749760350572875e-05, + "loss": 7.4938, + "step": 382500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5718060690900425e-05, + "loss": 7.4812, + "step": 383000 + }, + { + "epoch": 1.46, + "learning_rate": 2.568636103122797e-05, + "loss": 7.4793, + "step": 383500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5654661371555515e-05, + "loss": 7.4912, + "step": 384000 + }, + { + "epoch": 1.46, + "learning_rate": 2.5622961711883066e-05, + "loss": 7.4782, + "step": 384500 + }, + { + "epoch": 1.46, + "learning_rate": 2.559126205221061e-05, + "loss": 7.4864, + "step": 385000 + }, + { + "epoch": 1.47, + "learning_rate": 2.5559562392538156e-05, + "loss": 7.4904, + "step": 385500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5527862732865702e-05, + "loss": 7.4791, + "step": 386000 + }, + { + "epoch": 1.47, + "learning_rate": 2.5496163073193246e-05, + "loss": 7.4856, + "step": 386500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5464463413520796e-05, + "loss": 7.4887, + "step": 387000 + }, + { + "epoch": 1.47, + "learning_rate": 2.543276375384834e-05, + "loss": 7.4914, + "step": 387500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5401064094175886e-05, + "loss": 7.4958, + "step": 388000 + }, + { + "epoch": 1.48, + "learning_rate": 2.5369364434503436e-05, + "loss": 7.4776, + "step": 388500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5337664774830976e-05, + "loss": 7.4802, + "step": 389000 + }, + { + "epoch": 1.48, + "learning_rate": 2.5305965115158526e-05, + "loss": 7.4843, + "step": 389500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5274265455486073e-05, + "loss": 7.4739, + "step": 390000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5242565795813616e-05, + "loss": 7.4993, + "step": 390500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5210866136141166e-05, + "loss": 7.4867, + "step": 391000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5179166476468706e-05, + "loss": 7.5017, + "step": 391500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5147466816796256e-05, + "loss": 7.4876, + "step": 392000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5115767157123803e-05, + "loss": 7.5033, + "step": 392500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5084067497451346e-05, + "loss": 7.4735, + "step": 393000 + }, + { + "epoch": 1.5, + "learning_rate": 2.5052367837778896e-05, + "loss": 7.4946, + "step": 393500 + }, + { + "epoch": 1.5, + "learning_rate": 2.5020668178106443e-05, + "loss": 7.4833, + "step": 394000 + }, + { + "epoch": 1.5, + "learning_rate": 2.4988968518433986e-05, + "loss": 7.4954, + "step": 394500 + }, + { + "epoch": 1.5, + "learning_rate": 2.4957268858761533e-05, + "loss": 7.4927, + "step": 395000 + }, + { + "epoch": 1.5, + "learning_rate": 2.492556919908908e-05, + "loss": 7.4963, + "step": 395500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4893869539416626e-05, + "loss": 7.4996, + "step": 396000 + }, + { + "epoch": 1.51, + "learning_rate": 2.486216987974417e-05, + "loss": 7.4848, + "step": 396500 + }, + { + "epoch": 1.51, + "learning_rate": 2.483047022007172e-05, + "loss": 7.4834, + "step": 397000 + }, + { + "epoch": 1.51, + "learning_rate": 2.4798770560399266e-05, + "loss": 7.4839, + "step": 397500 + }, + { + "epoch": 1.51, + "learning_rate": 2.476707090072681e-05, + "loss": 7.5004, + "step": 398000 + }, + { + "epoch": 1.52, + "learning_rate": 2.4735371241054356e-05, + "loss": 7.4833, + "step": 398500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4703671581381903e-05, + "loss": 7.4885, + "step": 399000 + }, + { + "epoch": 1.52, + "learning_rate": 2.467197192170945e-05, + "loss": 7.4587, + "step": 399500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4640272262036996e-05, + "loss": 7.4901, + "step": 400000 + }, + { + "epoch": 1.52, + "learning_rate": 2.4608572602364543e-05, + "loss": 7.487, + "step": 400500 + }, + { + "epoch": 1.53, + "learning_rate": 2.457687294269209e-05, + "loss": 7.5021, + "step": 401000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4545173283019633e-05, + "loss": 7.4917, + "step": 401500 + }, + { + "epoch": 1.53, + "learning_rate": 2.451347362334718e-05, + "loss": 7.4905, + "step": 402000 + }, + { + "epoch": 1.53, + "learning_rate": 2.448177396367473e-05, + "loss": 7.4921, + "step": 402500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4450074304002273e-05, + "loss": 7.4727, + "step": 403000 + }, + { + "epoch": 1.53, + "learning_rate": 2.441837464432982e-05, + "loss": 7.4932, + "step": 403500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4386674984657363e-05, + "loss": 7.4857, + "step": 404000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4354975324984913e-05, + "loss": 7.4678, + "step": 404500 + }, + { + "epoch": 1.54, + "learning_rate": 2.432327566531246e-05, + "loss": 7.5037, + "step": 405000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4291576005640003e-05, + "loss": 7.4715, + "step": 405500 + }, + { + "epoch": 1.54, + "learning_rate": 2.425987634596755e-05, + "loss": 7.4757, + "step": 406000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4228176686295097e-05, + "loss": 7.4853, + "step": 406500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4196477026622644e-05, + "loss": 7.4942, + "step": 407000 + }, + { + "epoch": 1.55, + "learning_rate": 2.416477736695019e-05, + "loss": 7.4944, + "step": 407500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4133077707277734e-05, + "loss": 7.4825, + "step": 408000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4101378047605284e-05, + "loss": 7.4988, + "step": 408500 + }, + { + "epoch": 1.56, + "learning_rate": 2.4069678387932827e-05, + "loss": 7.4786, + "step": 409000 + }, + { + "epoch": 1.56, + "learning_rate": 2.4037978728260374e-05, + "loss": 7.4794, + "step": 409500 + }, + { + "epoch": 1.56, + "learning_rate": 2.400627906858792e-05, + "loss": 7.4878, + "step": 410000 + }, + { + "epoch": 1.56, + "learning_rate": 2.3974579408915467e-05, + "loss": 7.4908, + "step": 410500 + }, + { + "epoch": 1.56, + "learning_rate": 2.3942879749243014e-05, + "loss": 7.4775, + "step": 411000 + }, + { + "epoch": 1.57, + "learning_rate": 2.391118008957056e-05, + "loss": 7.4931, + "step": 411500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3879480429898104e-05, + "loss": 7.479, + "step": 412000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3847780770225654e-05, + "loss": 7.4955, + "step": 412500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3816081110553197e-05, + "loss": 7.4721, + "step": 413000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3784381450880744e-05, + "loss": 7.4964, + "step": 413500 + }, + { + "epoch": 1.57, + "learning_rate": 2.375268179120829e-05, + "loss": 7.4841, + "step": 414000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3720982131535837e-05, + "loss": 7.4801, + "step": 414500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3689282471863384e-05, + "loss": 7.4837, + "step": 415000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3657582812190927e-05, + "loss": 7.4862, + "step": 415500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3625883152518477e-05, + "loss": 7.4814, + "step": 416000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3594183492846024e-05, + "loss": 7.4863, + "step": 416500 + }, + { + "epoch": 1.59, + "learning_rate": 2.3562483833173567e-05, + "loss": 7.4836, + "step": 417000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3530784173501114e-05, + "loss": 7.4956, + "step": 417500 + }, + { + "epoch": 1.59, + "learning_rate": 2.349908451382866e-05, + "loss": 7.4953, + "step": 418000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3467384854156208e-05, + "loss": 7.4966, + "step": 418500 + }, + { + "epoch": 1.59, + "learning_rate": 2.3435685194483754e-05, + "loss": 7.4903, + "step": 419000 + }, + { + "epoch": 1.6, + "learning_rate": 2.3403985534811298e-05, + "loss": 7.491, + "step": 419500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3372285875138848e-05, + "loss": 7.4829, + "step": 420000 + }, + { + "epoch": 1.6, + "learning_rate": 2.334058621546639e-05, + "loss": 7.4785, + "step": 420500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3308886555793938e-05, + "loss": 7.4789, + "step": 421000 + }, + { + "epoch": 1.6, + "learning_rate": 2.3277186896121484e-05, + "loss": 7.4857, + "step": 421500 + }, + { + "epoch": 1.61, + "learning_rate": 2.324548723644903e-05, + "loss": 7.4795, + "step": 422000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3213787576776578e-05, + "loss": 7.4787, + "step": 422500 + }, + { + "epoch": 1.61, + "learning_rate": 2.318208791710412e-05, + "loss": 7.4901, + "step": 423000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3150388257431668e-05, + "loss": 7.4851, + "step": 423500 + }, + { + "epoch": 1.61, + "learning_rate": 2.3118688597759218e-05, + "loss": 7.4885, + "step": 424000 + }, + { + "epoch": 1.61, + "learning_rate": 2.308698893808676e-05, + "loss": 7.4778, + "step": 424500 + }, + { + "epoch": 1.62, + "learning_rate": 2.3055289278414308e-05, + "loss": 7.4719, + "step": 425000 + }, + { + "epoch": 1.62, + "learning_rate": 2.3023589618741855e-05, + "loss": 7.4956, + "step": 425500 + }, + { + "epoch": 1.62, + "learning_rate": 2.29918899590694e-05, + "loss": 7.4809, + "step": 426000 + }, + { + "epoch": 1.62, + "learning_rate": 2.2960190299396948e-05, + "loss": 7.4857, + "step": 426500 + }, + { + "epoch": 1.62, + "learning_rate": 2.292849063972449e-05, + "loss": 7.4774, + "step": 427000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2896790980052038e-05, + "loss": 7.479, + "step": 427500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2865091320379585e-05, + "loss": 7.4812, + "step": 428000 + }, + { + "epoch": 1.63, + "learning_rate": 2.283339166070713e-05, + "loss": 7.4783, + "step": 428500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2801692001034678e-05, + "loss": 7.4832, + "step": 429000 + }, + { + "epoch": 1.63, + "learning_rate": 2.276999234136222e-05, + "loss": 7.4875, + "step": 429500 + }, + { + "epoch": 1.64, + "learning_rate": 2.273829268168977e-05, + "loss": 7.4899, + "step": 430000 + }, + { + "epoch": 1.64, + "learning_rate": 2.270659302201732e-05, + "loss": 7.4836, + "step": 430500 + }, + { + "epoch": 1.64, + "learning_rate": 2.267489336234486e-05, + "loss": 7.4894, + "step": 431000 + }, + { + "epoch": 1.64, + "learning_rate": 2.2643193702672412e-05, + "loss": 7.4748, + "step": 431500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2611494042999955e-05, + "loss": 7.4979, + "step": 432000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2579794383327502e-05, + "loss": 7.509, + "step": 432500 + }, + { + "epoch": 1.65, + "learning_rate": 2.254809472365505e-05, + "loss": 7.4842, + "step": 433000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2516395063982595e-05, + "loss": 7.4776, + "step": 433500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2484695404310142e-05, + "loss": 7.488, + "step": 434000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2452995744637685e-05, + "loss": 7.5027, + "step": 434500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2421296084965232e-05, + "loss": 7.4735, + "step": 435000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2389596425292782e-05, + "loss": 7.4812, + "step": 435500 + }, + { + "epoch": 1.66, + "learning_rate": 2.2357896765620325e-05, + "loss": 7.5071, + "step": 436000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2326197105947872e-05, + "loss": 7.4867, + "step": 436500 + }, + { + "epoch": 1.66, + "learning_rate": 2.2294497446275415e-05, + "loss": 7.4881, + "step": 437000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2262797786602965e-05, + "loss": 7.4917, + "step": 437500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2231098126930512e-05, + "loss": 7.4892, + "step": 438000 + }, + { + "epoch": 1.67, + "learning_rate": 2.2199398467258055e-05, + "loss": 7.4777, + "step": 438500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2167698807585602e-05, + "loss": 7.5001, + "step": 439000 + }, + { + "epoch": 1.67, + "learning_rate": 2.213599914791315e-05, + "loss": 7.4948, + "step": 439500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2104299488240696e-05, + "loss": 7.4712, + "step": 440000 + }, + { + "epoch": 1.68, + "learning_rate": 2.2072599828568242e-05, + "loss": 7.4989, + "step": 440500 + }, + { + "epoch": 1.68, + "learning_rate": 2.2040900168895786e-05, + "loss": 7.4885, + "step": 441000 + }, + { + "epoch": 1.68, + "learning_rate": 2.2009200509223336e-05, + "loss": 7.4871, + "step": 441500 + }, + { + "epoch": 1.68, + "learning_rate": 2.197750084955088e-05, + "loss": 7.4881, + "step": 442000 + }, + { + "epoch": 1.68, + "learning_rate": 2.1945801189878426e-05, + "loss": 7.4896, + "step": 442500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1914101530205972e-05, + "loss": 7.4904, + "step": 443000 + }, + { + "epoch": 1.69, + "learning_rate": 2.188240187053352e-05, + "loss": 7.4898, + "step": 443500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1850702210861066e-05, + "loss": 7.4957, + "step": 444000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1819002551188612e-05, + "loss": 7.4865, + "step": 444500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1787302891516156e-05, + "loss": 7.4915, + "step": 445000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1755603231843706e-05, + "loss": 7.497, + "step": 445500 + }, + { + "epoch": 1.7, + "learning_rate": 2.172390357217125e-05, + "loss": 7.4874, + "step": 446000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1692203912498796e-05, + "loss": 7.4772, + "step": 446500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1660504252826343e-05, + "loss": 7.4974, + "step": 447000 + }, + { + "epoch": 1.7, + "learning_rate": 2.162880459315389e-05, + "loss": 7.4917, + "step": 447500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1597104933481436e-05, + "loss": 7.4971, + "step": 448000 + }, + { + "epoch": 1.71, + "learning_rate": 2.156540527380898e-05, + "loss": 7.4751, + "step": 448500 + }, + { + "epoch": 1.71, + "learning_rate": 2.153370561413653e-05, + "loss": 7.4918, + "step": 449000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1502005954464076e-05, + "loss": 7.4861, + "step": 449500 + }, + { + "epoch": 1.71, + "learning_rate": 2.147030629479162e-05, + "loss": 7.4963, + "step": 450000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1438606635119166e-05, + "loss": 7.4781, + "step": 450500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1406906975446713e-05, + "loss": 7.4913, + "step": 451000 + }, + { + "epoch": 1.72, + "learning_rate": 2.137520731577426e-05, + "loss": 7.4899, + "step": 451500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1343507656101806e-05, + "loss": 7.4869, + "step": 452000 + }, + { + "epoch": 1.72, + "learning_rate": 2.131180799642935e-05, + "loss": 7.4908, + "step": 452500 + }, + { + "epoch": 1.72, + "learning_rate": 2.12801083367569e-05, + "loss": 7.4979, + "step": 453000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1248408677084443e-05, + "loss": 7.4752, + "step": 453500 + }, + { + "epoch": 1.73, + "learning_rate": 2.121670901741199e-05, + "loss": 7.4942, + "step": 454000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1185009357739536e-05, + "loss": 7.4794, + "step": 454500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1153309698067083e-05, + "loss": 7.5003, + "step": 455000 + }, + { + "epoch": 1.73, + "learning_rate": 2.112161003839463e-05, + "loss": 7.4842, + "step": 455500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1089910378722173e-05, + "loss": 7.49, + "step": 456000 + }, + { + "epoch": 1.74, + "learning_rate": 2.105821071904972e-05, + "loss": 7.4955, + "step": 456500 + }, + { + "epoch": 1.74, + "learning_rate": 2.102651105937727e-05, + "loss": 7.4922, + "step": 457000 + }, + { + "epoch": 1.74, + "learning_rate": 2.0994811399704813e-05, + "loss": 7.4992, + "step": 457500 + }, + { + "epoch": 1.74, + "learning_rate": 2.096311174003236e-05, + "loss": 7.4878, + "step": 458000 + }, + { + "epoch": 1.74, + "learning_rate": 2.0931412080359907e-05, + "loss": 7.4955, + "step": 458500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0899712420687453e-05, + "loss": 7.4969, + "step": 459000 + }, + { + "epoch": 1.75, + "learning_rate": 2.0868012761015e-05, + "loss": 7.5065, + "step": 459500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0836313101342543e-05, + "loss": 7.4951, + "step": 460000 + }, + { + "epoch": 1.75, + "learning_rate": 2.080461344167009e-05, + "loss": 7.485, + "step": 460500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0772913781997637e-05, + "loss": 7.4797, + "step": 461000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0741214122325183e-05, + "loss": 7.495, + "step": 461500 + }, + { + "epoch": 1.76, + "learning_rate": 2.070951446265273e-05, + "loss": 7.4678, + "step": 462000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0677814802980274e-05, + "loss": 7.5105, + "step": 462500 + }, + { + "epoch": 1.76, + "learning_rate": 2.0646115143307824e-05, + "loss": 7.4975, + "step": 463000 + }, + { + "epoch": 1.76, + "learning_rate": 2.061441548363537e-05, + "loss": 7.483, + "step": 463500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0582715823962914e-05, + "loss": 7.4876, + "step": 464000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0551016164290464e-05, + "loss": 7.5046, + "step": 464500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0519316504618007e-05, + "loss": 7.4868, + "step": 465000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0487616844945554e-05, + "loss": 7.4981, + "step": 465500 + }, + { + "epoch": 1.77, + "learning_rate": 2.04559171852731e-05, + "loss": 7.4838, + "step": 466000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0424217525600647e-05, + "loss": 7.4986, + "step": 466500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0392517865928194e-05, + "loss": 7.4767, + "step": 467000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0360818206255737e-05, + "loss": 7.4845, + "step": 467500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0329118546583284e-05, + "loss": 7.4986, + "step": 468000 + }, + { + "epoch": 1.78, + "learning_rate": 2.029741888691083e-05, + "loss": 7.4984, + "step": 468500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0265719227238377e-05, + "loss": 7.4851, + "step": 469000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0234019567565924e-05, + "loss": 7.4876, + "step": 469500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0202319907893467e-05, + "loss": 7.4906, + "step": 470000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0170620248221017e-05, + "loss": 7.4916, + "step": 470500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0138920588548564e-05, + "loss": 7.4783, + "step": 471000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0107220928876107e-05, + "loss": 7.492, + "step": 471500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0075521269203654e-05, + "loss": 7.4747, + "step": 472000 + }, + { + "epoch": 1.8, + "learning_rate": 2.00438216095312e-05, + "loss": 7.4935, + "step": 472500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0012121949858748e-05, + "loss": 7.4952, + "step": 473000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9980422290186294e-05, + "loss": 7.4861, + "step": 473500 + }, + { + "epoch": 1.8, + "learning_rate": 1.9948722630513838e-05, + "loss": 7.4757, + "step": 474000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9917022970841388e-05, + "loss": 7.4784, + "step": 474500 + }, + { + "epoch": 1.81, + "learning_rate": 1.988532331116893e-05, + "loss": 7.5014, + "step": 475000 + }, + { + "epoch": 1.81, + "learning_rate": 1.9853623651496478e-05, + "loss": 7.4779, + "step": 475500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9821923991824024e-05, + "loss": 7.4747, + "step": 476000 + }, + { + "epoch": 1.81, + "learning_rate": 1.979022433215157e-05, + "loss": 7.4924, + "step": 476500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9758524672479118e-05, + "loss": 7.4931, + "step": 477000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9726825012806664e-05, + "loss": 7.5003, + "step": 477500 + }, + { + "epoch": 1.82, + "learning_rate": 1.9695125353134208e-05, + "loss": 7.4917, + "step": 478000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9663425693461758e-05, + "loss": 7.485, + "step": 478500 + }, + { + "epoch": 1.82, + "learning_rate": 1.96317260337893e-05, + "loss": 7.4869, + "step": 479000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9600026374116848e-05, + "loss": 7.5071, + "step": 479500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9568326714444395e-05, + "loss": 7.4769, + "step": 480000 + }, + { + "epoch": 1.83, + "learning_rate": 1.953662705477194e-05, + "loss": 7.4922, + "step": 480500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9504927395099488e-05, + "loss": 7.4746, + "step": 481000 + }, + { + "epoch": 1.83, + "learning_rate": 1.947322773542703e-05, + "loss": 7.4716, + "step": 481500 + }, + { + "epoch": 1.83, + "learning_rate": 1.944152807575458e-05, + "loss": 7.4843, + "step": 482000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9409828416082128e-05, + "loss": 7.4858, + "step": 482500 + }, + { + "epoch": 1.84, + "learning_rate": 1.937812875640967e-05, + "loss": 7.4732, + "step": 483000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9346429096737218e-05, + "loss": 7.4873, + "step": 483500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9314729437064765e-05, + "loss": 7.4883, + "step": 484000 + }, + { + "epoch": 1.84, + "learning_rate": 1.928302977739231e-05, + "loss": 7.4897, + "step": 484500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9251330117719858e-05, + "loss": 7.4871, + "step": 485000 + }, + { + "epoch": 1.85, + "learning_rate": 1.92196304580474e-05, + "loss": 7.4895, + "step": 485500 + }, + { + "epoch": 1.85, + "learning_rate": 1.918793079837495e-05, + "loss": 7.5044, + "step": 486000 + }, + { + "epoch": 1.85, + "learning_rate": 1.9156231138702495e-05, + "loss": 7.4946, + "step": 486500 + }, + { + "epoch": 1.85, + "learning_rate": 1.912453147903004e-05, + "loss": 7.4789, + "step": 487000 + }, + { + "epoch": 1.85, + "learning_rate": 1.909283181935759e-05, + "loss": 7.48, + "step": 487500 + }, + { + "epoch": 1.86, + "learning_rate": 1.9061132159685135e-05, + "loss": 7.4858, + "step": 488000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9029432500012682e-05, + "loss": 7.5018, + "step": 488500 + }, + { + "epoch": 1.86, + "learning_rate": 1.8997732840340225e-05, + "loss": 7.4792, + "step": 489000 + }, + { + "epoch": 1.86, + "learning_rate": 1.8966033180667772e-05, + "loss": 7.4872, + "step": 489500 + }, + { + "epoch": 1.86, + "learning_rate": 1.8934333520995322e-05, + "loss": 7.4953, + "step": 490000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8902633861322865e-05, + "loss": 7.4795, + "step": 490500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8870934201650412e-05, + "loss": 7.4935, + "step": 491000 + }, + { + "epoch": 1.87, + "learning_rate": 1.883923454197796e-05, + "loss": 7.4772, + "step": 491500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8807534882305505e-05, + "loss": 7.4874, + "step": 492000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8775835222633052e-05, + "loss": 7.4835, + "step": 492500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8744135562960595e-05, + "loss": 7.4861, + "step": 493000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8712435903288142e-05, + "loss": 7.49, + "step": 493500 + }, + { + "epoch": 1.88, + "learning_rate": 1.868073624361569e-05, + "loss": 7.4842, + "step": 494000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8649036583943235e-05, + "loss": 7.4898, + "step": 494500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8617336924270782e-05, + "loss": 7.5066, + "step": 495000 + }, + { + "epoch": 1.88, + "learning_rate": 1.858563726459833e-05, + "loss": 7.5039, + "step": 495500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8553937604925876e-05, + "loss": 7.4901, + "step": 496000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8522237945253422e-05, + "loss": 7.4683, + "step": 496500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8490538285580966e-05, + "loss": 7.4971, + "step": 497000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8458838625908516e-05, + "loss": 7.4787, + "step": 497500 + }, + { + "epoch": 1.89, + "learning_rate": 1.842713896623606e-05, + "loss": 7.4902, + "step": 498000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8395439306563606e-05, + "loss": 7.4826, + "step": 498500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8363739646891152e-05, + "loss": 7.4757, + "step": 499000 + }, + { + "epoch": 1.9, + "learning_rate": 1.83320399872187e-05, + "loss": 7.4912, + "step": 499500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8300340327546246e-05, + "loss": 7.4803, + "step": 500000 + }, + { + "epoch": 1.9, + "learning_rate": 1.826864066787379e-05, + "loss": 7.4893, + "step": 500500 + }, + { + "epoch": 1.91, + "learning_rate": 1.8236941008201336e-05, + "loss": 7.4932, + "step": 501000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8205241348528883e-05, + "loss": 7.4942, + "step": 501500 + }, + { + "epoch": 1.91, + "learning_rate": 1.817354168885643e-05, + "loss": 7.4846, + "step": 502000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8141842029183976e-05, + "loss": 7.501, + "step": 502500 + }, + { + "epoch": 1.91, + "learning_rate": 1.811014236951152e-05, + "loss": 7.4974, + "step": 503000 + }, + { + "epoch": 1.92, + "learning_rate": 1.807844270983907e-05, + "loss": 7.485, + "step": 503500 + }, + { + "epoch": 1.92, + "learning_rate": 1.8046743050166616e-05, + "loss": 7.4922, + "step": 504000 + }, + { + "epoch": 1.92, + "learning_rate": 1.801504339049416e-05, + "loss": 7.4862, + "step": 504500 + }, + { + "epoch": 1.92, + "learning_rate": 1.7983343730821706e-05, + "loss": 7.4751, + "step": 505000 + }, + { + "epoch": 1.92, + "learning_rate": 1.7951644071149253e-05, + "loss": 7.4911, + "step": 505500 + }, + { + "epoch": 1.92, + "learning_rate": 1.79199444114768e-05, + "loss": 7.4885, + "step": 506000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7888244751804346e-05, + "loss": 7.4996, + "step": 506500 + }, + { + "epoch": 1.93, + "learning_rate": 1.785654509213189e-05, + "loss": 7.4842, + "step": 507000 + }, + { + "epoch": 1.93, + "learning_rate": 1.782484543245944e-05, + "loss": 7.482, + "step": 507500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7793145772786983e-05, + "loss": 7.5013, + "step": 508000 + }, + { + "epoch": 1.93, + "learning_rate": 1.776144611311453e-05, + "loss": 7.4943, + "step": 508500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7729746453442076e-05, + "loss": 7.4914, + "step": 509000 + }, + { + "epoch": 1.94, + "learning_rate": 1.7698046793769623e-05, + "loss": 7.4987, + "step": 509500 + }, + { + "epoch": 1.94, + "learning_rate": 1.766634713409717e-05, + "loss": 7.4847, + "step": 510000 + }, + { + "epoch": 1.94, + "learning_rate": 1.7634647474424716e-05, + "loss": 7.4991, + "step": 510500 + }, + { + "epoch": 1.94, + "learning_rate": 1.760294781475226e-05, + "loss": 7.5086, + "step": 511000 + }, + { + "epoch": 1.95, + "learning_rate": 1.757124815507981e-05, + "loss": 7.4931, + "step": 511500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7539548495407353e-05, + "loss": 7.4879, + "step": 512000 + }, + { + "epoch": 1.95, + "learning_rate": 1.75078488357349e-05, + "loss": 7.4858, + "step": 512500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7476149176062447e-05, + "loss": 7.4899, + "step": 513000 + }, + { + "epoch": 1.95, + "learning_rate": 1.7444449516389993e-05, + "loss": 7.4803, + "step": 513500 + }, + { + "epoch": 1.96, + "learning_rate": 1.741274985671754e-05, + "loss": 7.4752, + "step": 514000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7381050197045083e-05, + "loss": 7.4754, + "step": 514500 + }, + { + "epoch": 1.96, + "learning_rate": 1.7349350537372633e-05, + "loss": 7.4866, + "step": 515000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7317650877700177e-05, + "loss": 7.4782, + "step": 515500 + }, + { + "epoch": 1.96, + "learning_rate": 1.7285951218027723e-05, + "loss": 7.4994, + "step": 516000 + }, + { + "epoch": 1.96, + "learning_rate": 1.725425155835527e-05, + "loss": 7.4918, + "step": 516500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7222551898682817e-05, + "loss": 7.4669, + "step": 517000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7190852239010364e-05, + "loss": 7.4877, + "step": 517500 + }, + { + "epoch": 1.97, + "learning_rate": 1.715915257933791e-05, + "loss": 7.4764, + "step": 518000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7127452919665454e-05, + "loss": 7.4781, + "step": 518500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7095753259993004e-05, + "loss": 7.486, + "step": 519000 + }, + { + "epoch": 1.98, + "learning_rate": 1.7064053600320547e-05, + "loss": 7.4951, + "step": 519500 + }, + { + "epoch": 1.98, + "learning_rate": 1.7032353940648094e-05, + "loss": 7.4638, + "step": 520000 + }, + { + "epoch": 1.98, + "learning_rate": 1.700065428097564e-05, + "loss": 7.4869, + "step": 520500 + }, + { + "epoch": 1.98, + "learning_rate": 1.6968954621303187e-05, + "loss": 7.4846, + "step": 521000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6937254961630734e-05, + "loss": 7.4765, + "step": 521500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6905555301958277e-05, + "loss": 7.5108, + "step": 522000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6873855642285824e-05, + "loss": 7.4799, + "step": 522500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6842155982613374e-05, + "loss": 7.4881, + "step": 523000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6810456322940917e-05, + "loss": 7.4677, + "step": 523500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6778756663268464e-05, + "loss": 7.4847, + "step": 524000 + }, + { + "epoch": 2.0, + "learning_rate": 1.674705700359601e-05, + "loss": 7.4958, + "step": 524500 + }, + { + "epoch": 2.0, + "learning_rate": 1.6715357343923557e-05, + "loss": 7.4829, + "step": 525000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6683657684251104e-05, + "loss": 7.4905, + "step": 525500 + }, + { + "epoch": 2.0, + "learning_rate": 1.6651958024578647e-05, + "loss": 7.4893, + "step": 526000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6620258364906194e-05, + "loss": 7.4913, + "step": 526500 + }, + { + "epoch": 2.0, + "learning_rate": 1.658855870523374e-05, + "loss": 7.495, + "step": 527000 + }, + { + "epoch": 2.01, + "learning_rate": 1.6556859045561287e-05, + "loss": 7.4846, + "step": 527500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6525159385888834e-05, + "loss": 7.4968, + "step": 528000 + }, + { + "epoch": 2.01, + "learning_rate": 1.649345972621638e-05, + "loss": 7.4936, + "step": 528500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6461760066543928e-05, + "loss": 7.488, + "step": 529000 + }, + { + "epoch": 2.01, + "learning_rate": 1.643006040687147e-05, + "loss": 7.4935, + "step": 529500 + }, + { + "epoch": 2.02, + "learning_rate": 1.6398360747199018e-05, + "loss": 7.485, + "step": 530000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6366661087526568e-05, + "loss": 7.4838, + "step": 530500 + }, + { + "epoch": 2.02, + "learning_rate": 1.633496142785411e-05, + "loss": 7.5033, + "step": 531000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6303261768181658e-05, + "loss": 7.5058, + "step": 531500 + }, + { + "epoch": 2.02, + "learning_rate": 1.6271562108509204e-05, + "loss": 7.4735, + "step": 532000 + }, + { + "epoch": 2.03, + "learning_rate": 1.623986244883675e-05, + "loss": 7.5094, + "step": 532500 + }, + { + "epoch": 2.03, + "learning_rate": 1.6208162789164298e-05, + "loss": 7.468, + "step": 533000 + }, + { + "epoch": 2.03, + "learning_rate": 1.617646312949184e-05, + "loss": 7.4916, + "step": 533500 + }, + { + "epoch": 2.03, + "learning_rate": 1.6144763469819388e-05, + "loss": 7.4981, + "step": 534000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6113063810146935e-05, + "loss": 7.4882, + "step": 534500 + }, + { + "epoch": 2.04, + "learning_rate": 1.608136415047448e-05, + "loss": 7.488, + "step": 535000 + }, + { + "epoch": 2.04, + "learning_rate": 1.6049664490802028e-05, + "loss": 7.4897, + "step": 535500 + }, + { + "epoch": 2.04, + "learning_rate": 1.601796483112957e-05, + "loss": 7.4763, + "step": 536000 + }, + { + "epoch": 2.04, + "learning_rate": 1.598626517145712e-05, + "loss": 7.4969, + "step": 536500 + }, + { + "epoch": 2.04, + "learning_rate": 1.5954565511784668e-05, + "loss": 7.4741, + "step": 537000 + }, + { + "epoch": 2.04, + "learning_rate": 1.592286585211221e-05, + "loss": 7.4865, + "step": 537500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5891166192439758e-05, + "loss": 7.4717, + "step": 538000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5859466532767305e-05, + "loss": 7.496, + "step": 538500 + }, + { + "epoch": 2.05, + "learning_rate": 1.582776687309485e-05, + "loss": 7.4946, + "step": 539000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5796067213422398e-05, + "loss": 7.4919, + "step": 539500 + }, + { + "epoch": 2.05, + "learning_rate": 1.576436755374994e-05, + "loss": 7.4883, + "step": 540000 + }, + { + "epoch": 2.06, + "learning_rate": 1.573266789407749e-05, + "loss": 7.4776, + "step": 540500 + }, + { + "epoch": 2.06, + "learning_rate": 1.5700968234405035e-05, + "loss": 7.4971, + "step": 541000 + }, + { + "epoch": 2.06, + "learning_rate": 1.566926857473258e-05, + "loss": 7.4961, + "step": 541500 + }, + { + "epoch": 2.06, + "learning_rate": 1.563756891506013e-05, + "loss": 7.4906, + "step": 542000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5605869255387675e-05, + "loss": 7.5022, + "step": 542500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5574169595715222e-05, + "loss": 7.4833, + "step": 543000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5542469936042765e-05, + "loss": 7.4884, + "step": 543500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5510770276370315e-05, + "loss": 7.4796, + "step": 544000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5479070616697862e-05, + "loss": 7.4927, + "step": 544500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5447370957025405e-05, + "loss": 7.478, + "step": 545000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5415671297352952e-05, + "loss": 7.4795, + "step": 545500 + }, + { + "epoch": 2.08, + "learning_rate": 1.53839716376805e-05, + "loss": 7.4672, + "step": 546000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5352271978008045e-05, + "loss": 7.4848, + "step": 546500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5320572318335592e-05, + "loss": 7.4975, + "step": 547000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5288872658663135e-05, + "loss": 7.486, + "step": 547500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5257172998990685e-05, + "loss": 7.5042, + "step": 548000 + }, + { + "epoch": 2.09, + "learning_rate": 1.522547333931823e-05, + "loss": 7.4824, + "step": 548500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5193773679645775e-05, + "loss": 7.4905, + "step": 549000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5162074019973322e-05, + "loss": 7.4875, + "step": 549500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5130374360300869e-05, + "loss": 7.4709, + "step": 550000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5098674700628416e-05, + "loss": 7.484, + "step": 550500 + }, + { + "epoch": 2.1, + "learning_rate": 1.506697504095596e-05, + "loss": 7.4856, + "step": 551000 + }, + { + "epoch": 2.1, + "learning_rate": 1.5035275381283506e-05, + "loss": 7.4796, + "step": 551500 + }, + { + "epoch": 2.1, + "learning_rate": 1.5003575721611054e-05, + "loss": 7.4841, + "step": 552000 + }, + { + "epoch": 2.1, + "learning_rate": 1.49718760619386e-05, + "loss": 7.4878, + "step": 552500 + }, + { + "epoch": 2.1, + "learning_rate": 1.4940176402266146e-05, + "loss": 7.5088, + "step": 553000 + }, + { + "epoch": 2.11, + "learning_rate": 1.490847674259369e-05, + "loss": 7.4853, + "step": 553500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4876777082921239e-05, + "loss": 7.4974, + "step": 554000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4845077423248784e-05, + "loss": 7.4835, + "step": 554500 + }, + { + "epoch": 2.11, + "learning_rate": 1.481337776357633e-05, + "loss": 7.4858, + "step": 555000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4781678103903876e-05, + "loss": 7.4657, + "step": 555500 + }, + { + "epoch": 2.12, + "learning_rate": 1.4749978444231424e-05, + "loss": 7.487, + "step": 556000 + }, + { + "epoch": 2.12, + "learning_rate": 1.471827878455897e-05, + "loss": 7.4915, + "step": 556500 + }, + { + "epoch": 2.12, + "learning_rate": 1.4686579124886516e-05, + "loss": 7.4858, + "step": 557000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4654879465214061e-05, + "loss": 7.4788, + "step": 557500 + }, + { + "epoch": 2.12, + "learning_rate": 1.462317980554161e-05, + "loss": 7.4807, + "step": 558000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4591480145869154e-05, + "loss": 7.4823, + "step": 558500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4559780486196701e-05, + "loss": 7.4924, + "step": 559000 + }, + { + "epoch": 2.13, + "learning_rate": 1.4528080826524248e-05, + "loss": 7.4928, + "step": 559500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4496381166851794e-05, + "loss": 7.4887, + "step": 560000 + }, + { + "epoch": 2.13, + "learning_rate": 1.446468150717934e-05, + "loss": 7.487, + "step": 560500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4432981847506884e-05, + "loss": 7.4855, + "step": 561000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4401282187834433e-05, + "loss": 7.4952, + "step": 561500 + }, + { + "epoch": 2.14, + "learning_rate": 1.436958252816198e-05, + "loss": 7.494, + "step": 562000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4337882868489525e-05, + "loss": 7.4929, + "step": 562500 + }, + { + "epoch": 2.14, + "learning_rate": 1.430618320881707e-05, + "loss": 7.4806, + "step": 563000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4274483549144618e-05, + "loss": 7.4835, + "step": 563500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4242783889472163e-05, + "loss": 7.4992, + "step": 564000 + }, + { + "epoch": 2.15, + "learning_rate": 1.421108422979971e-05, + "loss": 7.4782, + "step": 564500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4179384570127255e-05, + "loss": 7.4948, + "step": 565000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4147684910454803e-05, + "loss": 7.4824, + "step": 565500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4115985250782348e-05, + "loss": 7.4814, + "step": 566000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4084285591109895e-05, + "loss": 7.4857, + "step": 566500 + }, + { + "epoch": 2.16, + "learning_rate": 1.405258593143744e-05, + "loss": 7.4909, + "step": 567000 + }, + { + "epoch": 2.16, + "learning_rate": 1.4020886271764988e-05, + "loss": 7.4852, + "step": 567500 + }, + { + "epoch": 2.16, + "learning_rate": 1.3989186612092533e-05, + "loss": 7.4942, + "step": 568000 + }, + { + "epoch": 2.16, + "learning_rate": 1.3957486952420078e-05, + "loss": 7.4776, + "step": 568500 + }, + { + "epoch": 2.16, + "learning_rate": 1.3925787292747625e-05, + "loss": 7.4865, + "step": 569000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3894087633075173e-05, + "loss": 7.479, + "step": 569500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3862387973402718e-05, + "loss": 7.4894, + "step": 570000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3830688313730263e-05, + "loss": 7.4859, + "step": 570500 + }, + { + "epoch": 2.17, + "learning_rate": 1.379898865405781e-05, + "loss": 7.5113, + "step": 571000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3767288994385358e-05, + "loss": 7.4906, + "step": 571500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3735589334712903e-05, + "loss": 7.4839, + "step": 572000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3703889675040448e-05, + "loss": 7.4801, + "step": 572500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3672190015367995e-05, + "loss": 7.4786, + "step": 573000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3640490355695542e-05, + "loss": 7.4568, + "step": 573500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3608790696023089e-05, + "loss": 7.4822, + "step": 574000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3577091036350634e-05, + "loss": 7.5005, + "step": 574500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3545391376678179e-05, + "loss": 7.4834, + "step": 575000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3513691717005727e-05, + "loss": 7.4912, + "step": 575500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3481992057333274e-05, + "loss": 7.489, + "step": 576000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3450292397660819e-05, + "loss": 7.4785, + "step": 576500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3418592737988367e-05, + "loss": 7.4709, + "step": 577000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3386893078315912e-05, + "loss": 7.5052, + "step": 577500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3355193418643457e-05, + "loss": 7.4848, + "step": 578000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3323493758971004e-05, + "loss": 7.4859, + "step": 578500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3291794099298552e-05, + "loss": 7.4929, + "step": 579000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3260094439626097e-05, + "loss": 7.4868, + "step": 579500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3228394779953642e-05, + "loss": 7.4788, + "step": 580000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3196695120281189e-05, + "loss": 7.4951, + "step": 580500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3164995460608737e-05, + "loss": 7.4951, + "step": 581000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3133295800936282e-05, + "loss": 7.4861, + "step": 581500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3101596141263827e-05, + "loss": 7.4999, + "step": 582000 + }, + { + "epoch": 2.22, + "learning_rate": 1.3069896481591372e-05, + "loss": 7.4914, + "step": 582500 + }, + { + "epoch": 2.22, + "learning_rate": 1.303819682191892e-05, + "loss": 7.49, + "step": 583000 + }, + { + "epoch": 2.22, + "learning_rate": 1.3006497162246468e-05, + "loss": 7.4875, + "step": 583500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2974797502574013e-05, + "loss": 7.5048, + "step": 584000 + }, + { + "epoch": 2.22, + "learning_rate": 1.2943097842901558e-05, + "loss": 7.4962, + "step": 584500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2911398183229106e-05, + "loss": 7.4774, + "step": 585000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2879698523556653e-05, + "loss": 7.4836, + "step": 585500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2847998863884198e-05, + "loss": 7.502, + "step": 586000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2816299204211743e-05, + "loss": 7.4826, + "step": 586500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2784599544539291e-05, + "loss": 7.4802, + "step": 587000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2752899884866836e-05, + "loss": 7.4735, + "step": 587500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2721200225194383e-05, + "loss": 7.4813, + "step": 588000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2689500565521928e-05, + "loss": 7.4912, + "step": 588500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2657800905849476e-05, + "loss": 7.483, + "step": 589000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2626101246177021e-05, + "loss": 7.4972, + "step": 589500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2594401586504568e-05, + "loss": 7.4872, + "step": 590000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2562701926832113e-05, + "loss": 7.4886, + "step": 590500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2531002267159661e-05, + "loss": 7.4957, + "step": 591000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2499302607487206e-05, + "loss": 7.495, + "step": 591500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2467602947814753e-05, + "loss": 7.472, + "step": 592000 + }, + { + "epoch": 2.25, + "learning_rate": 1.24359032881423e-05, + "loss": 7.485, + "step": 592500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2404203628469846e-05, + "loss": 7.4938, + "step": 593000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2372503968797391e-05, + "loss": 7.4972, + "step": 593500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2340804309124938e-05, + "loss": 7.485, + "step": 594000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2309104649452483e-05, + "loss": 7.4855, + "step": 594500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2277404989780032e-05, + "loss": 7.488, + "step": 595000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2245705330107577e-05, + "loss": 7.4869, + "step": 595500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2214005670435123e-05, + "loss": 7.4852, + "step": 596000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2182306010762668e-05, + "loss": 7.4858, + "step": 596500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2150606351090215e-05, + "loss": 7.4779, + "step": 597000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2118906691417762e-05, + "loss": 7.4977, + "step": 597500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2087207031745308e-05, + "loss": 7.4837, + "step": 598000 + }, + { + "epoch": 2.28, + "learning_rate": 1.2055507372072853e-05, + "loss": 7.4878, + "step": 598500 + }, + { + "epoch": 2.28, + "learning_rate": 1.20238077124004e-05, + "loss": 7.5044, + "step": 599000 + }, + { + "epoch": 2.28, + "learning_rate": 1.1992108052727947e-05, + "loss": 7.4806, + "step": 599500 + }, + { + "epoch": 2.28, + "learning_rate": 1.1960408393055494e-05, + "loss": 7.4817, + "step": 600000 + }, + { + "epoch": 2.28, + "learning_rate": 1.1928708733383039e-05, + "loss": 7.4771, + "step": 600500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1897009073710585e-05, + "loss": 7.4823, + "step": 601000 + }, + { + "epoch": 2.29, + "learning_rate": 1.186530941403813e-05, + "loss": 7.4791, + "step": 601500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1833609754365679e-05, + "loss": 7.5043, + "step": 602000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1801910094693224e-05, + "loss": 7.493, + "step": 602500 + }, + { + "epoch": 2.29, + "learning_rate": 1.177021043502077e-05, + "loss": 7.4888, + "step": 603000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1738510775348315e-05, + "loss": 7.4986, + "step": 603500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1706811115675862e-05, + "loss": 7.4765, + "step": 604000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1675111456003409e-05, + "loss": 7.4994, + "step": 604500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1643411796330955e-05, + "loss": 7.4915, + "step": 605000 + }, + { + "epoch": 2.3, + "learning_rate": 1.16117121366585e-05, + "loss": 7.4944, + "step": 605500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1580012476986047e-05, + "loss": 7.4793, + "step": 606000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1548312817313594e-05, + "loss": 7.4996, + "step": 606500 + }, + { + "epoch": 2.31, + "learning_rate": 1.151661315764114e-05, + "loss": 7.4913, + "step": 607000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1484913497968687e-05, + "loss": 7.4759, + "step": 607500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1453213838296232e-05, + "loss": 7.4876, + "step": 608000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1421514178623779e-05, + "loss": 7.4641, + "step": 608500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1389814518951326e-05, + "loss": 7.4921, + "step": 609000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1358114859278872e-05, + "loss": 7.4813, + "step": 609500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1326415199606417e-05, + "loss": 7.5045, + "step": 610000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1294715539933964e-05, + "loss": 7.4985, + "step": 610500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1263015880261509e-05, + "loss": 7.4815, + "step": 611000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1231316220589058e-05, + "loss": 7.4762, + "step": 611500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1199616560916603e-05, + "loss": 7.4714, + "step": 612000 + }, + { + "epoch": 2.33, + "learning_rate": 1.116791690124415e-05, + "loss": 7.5055, + "step": 612500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1136217241571694e-05, + "loss": 7.5009, + "step": 613000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1104517581899241e-05, + "loss": 7.4863, + "step": 613500 + }, + { + "epoch": 2.34, + "learning_rate": 1.1072817922226788e-05, + "loss": 7.4795, + "step": 614000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1041118262554334e-05, + "loss": 7.4836, + "step": 614500 + }, + { + "epoch": 2.34, + "learning_rate": 1.100941860288188e-05, + "loss": 7.5069, + "step": 615000 + }, + { + "epoch": 2.34, + "learning_rate": 1.0977718943209426e-05, + "loss": 7.495, + "step": 615500 + }, + { + "epoch": 2.34, + "learning_rate": 1.0946019283536973e-05, + "loss": 7.4857, + "step": 616000 + }, + { + "epoch": 2.35, + "learning_rate": 1.091431962386452e-05, + "loss": 7.4908, + "step": 616500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0882619964192065e-05, + "loss": 7.482, + "step": 617000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0850920304519611e-05, + "loss": 7.4732, + "step": 617500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0819220644847156e-05, + "loss": 7.4855, + "step": 618000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0787520985174705e-05, + "loss": 7.4918, + "step": 618500 + }, + { + "epoch": 2.35, + "learning_rate": 1.075582132550225e-05, + "loss": 7.4965, + "step": 619000 + }, + { + "epoch": 2.36, + "learning_rate": 1.0724121665829796e-05, + "loss": 7.4942, + "step": 619500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0692422006157341e-05, + "loss": 7.4977, + "step": 620000 + }, + { + "epoch": 2.36, + "learning_rate": 1.0660722346484888e-05, + "loss": 7.4783, + "step": 620500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0629022686812435e-05, + "loss": 7.4989, + "step": 621000 + }, + { + "epoch": 2.36, + "learning_rate": 1.0597323027139981e-05, + "loss": 7.4745, + "step": 621500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0565623367467526e-05, + "loss": 7.4794, + "step": 622000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0533923707795073e-05, + "loss": 7.4847, + "step": 622500 + }, + { + "epoch": 2.37, + "learning_rate": 1.050222404812262e-05, + "loss": 7.4982, + "step": 623000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0470524388450167e-05, + "loss": 7.503, + "step": 623500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0438824728777713e-05, + "loss": 7.4928, + "step": 624000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0407125069105258e-05, + "loss": 7.479, + "step": 624500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0375425409432805e-05, + "loss": 7.4936, + "step": 625000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0343725749760352e-05, + "loss": 7.4868, + "step": 625500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0312026090087898e-05, + "loss": 7.4826, + "step": 626000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0280326430415443e-05, + "loss": 7.4834, + "step": 626500 + }, + { + "epoch": 2.39, + "learning_rate": 1.024862677074299e-05, + "loss": 7.4945, + "step": 627000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0216927111070535e-05, + "loss": 7.4836, + "step": 627500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0185227451398084e-05, + "loss": 7.4862, + "step": 628000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0153527791725629e-05, + "loss": 7.4984, + "step": 628500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0121828132053175e-05, + "loss": 7.4911, + "step": 629000 + }, + { + "epoch": 2.39, + "learning_rate": 1.009012847238072e-05, + "loss": 7.4969, + "step": 629500 + }, + { + "epoch": 2.4, + "learning_rate": 1.0058428812708267e-05, + "loss": 7.482, + "step": 630000 + }, + { + "epoch": 2.4, + "learning_rate": 1.0026729153035814e-05, + "loss": 7.4814, + "step": 630500 + }, + { + "epoch": 2.4, + "learning_rate": 9.99502949336336e-06, + "loss": 7.498, + "step": 631000 + }, + { + "epoch": 2.4, + "learning_rate": 9.963329833690905e-06, + "loss": 7.4941, + "step": 631500 + }, + { + "epoch": 2.4, + "learning_rate": 9.931630174018452e-06, + "loss": 7.5052, + "step": 632000 + }, + { + "epoch": 2.41, + "learning_rate": 9.899930514345999e-06, + "loss": 7.4824, + "step": 632500 + }, + { + "epoch": 2.41, + "learning_rate": 9.868230854673545e-06, + "loss": 7.4937, + "step": 633000 + }, + { + "epoch": 2.41, + "learning_rate": 9.83653119500109e-06, + "loss": 7.5007, + "step": 633500 + }, + { + "epoch": 2.41, + "learning_rate": 9.804831535328637e-06, + "loss": 7.4825, + "step": 634000 + }, + { + "epoch": 2.41, + "learning_rate": 9.773131875656182e-06, + "loss": 7.4892, + "step": 634500 + }, + { + "epoch": 2.42, + "learning_rate": 9.74143221598373e-06, + "loss": 7.4957, + "step": 635000 + }, + { + "epoch": 2.42, + "learning_rate": 9.709732556311276e-06, + "loss": 7.4823, + "step": 635500 + }, + { + "epoch": 2.42, + "learning_rate": 9.678032896638822e-06, + "loss": 7.486, + "step": 636000 + }, + { + "epoch": 2.42, + "learning_rate": 9.646333236966367e-06, + "loss": 7.4864, + "step": 636500 + }, + { + "epoch": 2.42, + "learning_rate": 9.614633577293914e-06, + "loss": 7.4859, + "step": 637000 + }, + { + "epoch": 2.43, + "learning_rate": 9.58293391762146e-06, + "loss": 7.4927, + "step": 637500 + }, + { + "epoch": 2.43, + "learning_rate": 9.551234257949007e-06, + "loss": 7.4718, + "step": 638000 + }, + { + "epoch": 2.43, + "learning_rate": 9.519534598276552e-06, + "loss": 7.4904, + "step": 638500 + }, + { + "epoch": 2.43, + "learning_rate": 9.4878349386041e-06, + "loss": 7.4748, + "step": 639000 + }, + { + "epoch": 2.43, + "learning_rate": 9.456135278931646e-06, + "loss": 7.5012, + "step": 639500 + }, + { + "epoch": 2.43, + "learning_rate": 9.424435619259193e-06, + "loss": 7.4837, + "step": 640000 + }, + { + "epoch": 2.44, + "learning_rate": 9.39273595958674e-06, + "loss": 7.4662, + "step": 640500 + }, + { + "epoch": 2.44, + "learning_rate": 9.361036299914284e-06, + "loss": 7.4819, + "step": 641000 + }, + { + "epoch": 2.44, + "learning_rate": 9.329336640241831e-06, + "loss": 7.4823, + "step": 641500 + }, + { + "epoch": 2.44, + "learning_rate": 9.297636980569378e-06, + "loss": 7.4812, + "step": 642000 + }, + { + "epoch": 2.44, + "learning_rate": 9.265937320896924e-06, + "loss": 7.4908, + "step": 642500 + }, + { + "epoch": 2.45, + "learning_rate": 9.23423766122447e-06, + "loss": 7.487, + "step": 643000 + }, + { + "epoch": 2.45, + "learning_rate": 9.202538001552016e-06, + "loss": 7.4879, + "step": 643500 + }, + { + "epoch": 2.45, + "learning_rate": 9.170838341879561e-06, + "loss": 7.4821, + "step": 644000 + }, + { + "epoch": 2.45, + "learning_rate": 9.13913868220711e-06, + "loss": 7.5029, + "step": 644500 + }, + { + "epoch": 2.45, + "learning_rate": 9.107439022534655e-06, + "loss": 7.4918, + "step": 645000 + }, + { + "epoch": 2.46, + "learning_rate": 9.075739362862201e-06, + "loss": 7.4751, + "step": 645500 + }, + { + "epoch": 2.46, + "learning_rate": 9.044039703189746e-06, + "loss": 7.4986, + "step": 646000 + }, + { + "epoch": 2.46, + "learning_rate": 9.012340043517293e-06, + "loss": 7.495, + "step": 646500 + }, + { + "epoch": 2.46, + "learning_rate": 8.98064038384484e-06, + "loss": 7.4812, + "step": 647000 + }, + { + "epoch": 2.46, + "learning_rate": 8.948940724172386e-06, + "loss": 7.489, + "step": 647500 + }, + { + "epoch": 2.46, + "learning_rate": 8.917241064499931e-06, + "loss": 7.4895, + "step": 648000 + }, + { + "epoch": 2.47, + "learning_rate": 8.885541404827478e-06, + "loss": 7.4865, + "step": 648500 + }, + { + "epoch": 2.47, + "learning_rate": 8.853841745155025e-06, + "loss": 7.4895, + "step": 649000 + }, + { + "epoch": 2.47, + "learning_rate": 8.822142085482571e-06, + "loss": 7.4767, + "step": 649500 + }, + { + "epoch": 2.47, + "learning_rate": 8.790442425810116e-06, + "loss": 7.4838, + "step": 650000 + }, + { + "epoch": 2.47, + "learning_rate": 8.758742766137663e-06, + "loss": 7.4996, + "step": 650500 + }, + { + "epoch": 2.48, + "learning_rate": 8.727043106465208e-06, + "loss": 7.4857, + "step": 651000 + }, + { + "epoch": 2.48, + "learning_rate": 8.695343446792757e-06, + "loss": 7.4907, + "step": 651500 + }, + { + "epoch": 2.48, + "learning_rate": 8.663643787120302e-06, + "loss": 7.4794, + "step": 652000 + }, + { + "epoch": 2.48, + "learning_rate": 8.631944127447848e-06, + "loss": 7.4902, + "step": 652500 + }, + { + "epoch": 2.48, + "learning_rate": 8.600244467775393e-06, + "loss": 7.4896, + "step": 653000 + }, + { + "epoch": 2.49, + "learning_rate": 8.56854480810294e-06, + "loss": 7.482, + "step": 653500 + }, + { + "epoch": 2.49, + "learning_rate": 8.536845148430487e-06, + "loss": 7.4846, + "step": 654000 + }, + { + "epoch": 2.49, + "learning_rate": 8.505145488758033e-06, + "loss": 7.4953, + "step": 654500 + }, + { + "epoch": 2.49, + "learning_rate": 8.473445829085578e-06, + "loss": 7.4905, + "step": 655000 + }, + { + "epoch": 2.49, + "learning_rate": 8.441746169413125e-06, + "loss": 7.4971, + "step": 655500 + }, + { + "epoch": 2.5, + "learning_rate": 8.410046509740672e-06, + "loss": 7.4926, + "step": 656000 + }, + { + "epoch": 2.5, + "learning_rate": 8.378346850068219e-06, + "loss": 7.4864, + "step": 656500 + }, + { + "epoch": 2.5, + "learning_rate": 8.346647190395765e-06, + "loss": 7.4681, + "step": 657000 + }, + { + "epoch": 2.5, + "learning_rate": 8.31494753072331e-06, + "loss": 7.4832, + "step": 657500 + }, + { + "epoch": 2.5, + "learning_rate": 8.283247871050857e-06, + "loss": 7.4953, + "step": 658000 + }, + { + "epoch": 2.5, + "learning_rate": 8.251548211378404e-06, + "loss": 7.4854, + "step": 658500 + }, + { + "epoch": 2.51, + "learning_rate": 8.21984855170595e-06, + "loss": 7.4948, + "step": 659000 + }, + { + "epoch": 2.51, + "learning_rate": 8.188148892033495e-06, + "loss": 7.4983, + "step": 659500 + }, + { + "epoch": 2.51, + "learning_rate": 8.156449232361042e-06, + "loss": 7.4777, + "step": 660000 + }, + { + "epoch": 2.51, + "learning_rate": 8.124749572688587e-06, + "loss": 7.4798, + "step": 660500 + }, + { + "epoch": 2.51, + "learning_rate": 8.093049913016136e-06, + "loss": 7.4792, + "step": 661000 + }, + { + "epoch": 2.52, + "learning_rate": 8.06135025334368e-06, + "loss": 7.4759, + "step": 661500 + }, + { + "epoch": 2.52, + "learning_rate": 8.029650593671227e-06, + "loss": 7.4847, + "step": 662000 + }, + { + "epoch": 2.52, + "learning_rate": 7.997950933998772e-06, + "loss": 7.4837, + "step": 662500 + }, + { + "epoch": 2.52, + "learning_rate": 7.966251274326319e-06, + "loss": 7.4778, + "step": 663000 + }, + { + "epoch": 2.52, + "learning_rate": 7.934551614653866e-06, + "loss": 7.4882, + "step": 663500 + }, + { + "epoch": 2.53, + "learning_rate": 7.902851954981412e-06, + "loss": 7.4778, + "step": 664000 + }, + { + "epoch": 2.53, + "learning_rate": 7.871152295308957e-06, + "loss": 7.4866, + "step": 664500 + }, + { + "epoch": 2.53, + "learning_rate": 7.839452635636504e-06, + "loss": 7.4954, + "step": 665000 + }, + { + "epoch": 2.53, + "learning_rate": 7.80775297596405e-06, + "loss": 7.4884, + "step": 665500 + }, + { + "epoch": 2.53, + "learning_rate": 7.776053316291597e-06, + "loss": 7.4751, + "step": 666000 + }, + { + "epoch": 2.54, + "learning_rate": 7.744353656619142e-06, + "loss": 7.4712, + "step": 666500 + }, + { + "epoch": 2.54, + "learning_rate": 7.71265399694669e-06, + "loss": 7.4923, + "step": 667000 + }, + { + "epoch": 2.54, + "learning_rate": 7.680954337274234e-06, + "loss": 7.4964, + "step": 667500 + }, + { + "epoch": 2.54, + "learning_rate": 7.649254677601783e-06, + "loss": 7.499, + "step": 668000 + }, + { + "epoch": 2.54, + "learning_rate": 7.617555017929328e-06, + "loss": 7.4754, + "step": 668500 + }, + { + "epoch": 2.54, + "learning_rate": 7.585855358256874e-06, + "loss": 7.4836, + "step": 669000 + }, + { + "epoch": 2.55, + "learning_rate": 7.554155698584419e-06, + "loss": 7.488, + "step": 669500 + }, + { + "epoch": 2.55, + "learning_rate": 7.522456038911967e-06, + "loss": 7.4923, + "step": 670000 + }, + { + "epoch": 2.55, + "learning_rate": 7.490756379239512e-06, + "loss": 7.4793, + "step": 670500 + }, + { + "epoch": 2.55, + "learning_rate": 7.4590567195670594e-06, + "loss": 7.4939, + "step": 671000 + }, + { + "epoch": 2.55, + "learning_rate": 7.427357059894606e-06, + "loss": 7.487, + "step": 671500 + }, + { + "epoch": 2.56, + "learning_rate": 7.395657400222151e-06, + "loss": 7.4868, + "step": 672000 + }, + { + "epoch": 2.56, + "learning_rate": 7.363957740549699e-06, + "loss": 7.4842, + "step": 672500 + }, + { + "epoch": 2.56, + "learning_rate": 7.332258080877244e-06, + "loss": 7.4822, + "step": 673000 + }, + { + "epoch": 2.56, + "learning_rate": 7.300558421204791e-06, + "loss": 7.4852, + "step": 673500 + }, + { + "epoch": 2.56, + "learning_rate": 7.268858761532336e-06, + "loss": 7.4891, + "step": 674000 + }, + { + "epoch": 2.57, + "learning_rate": 7.237159101859883e-06, + "loss": 7.4841, + "step": 674500 + }, + { + "epoch": 2.57, + "learning_rate": 7.205459442187429e-06, + "loss": 7.4945, + "step": 675000 + }, + { + "epoch": 2.57, + "learning_rate": 7.1737597825149755e-06, + "loss": 7.4866, + "step": 675500 + }, + { + "epoch": 2.57, + "learning_rate": 7.142060122842521e-06, + "loss": 7.488, + "step": 676000 + }, + { + "epoch": 2.57, + "learning_rate": 7.110360463170068e-06, + "loss": 7.4688, + "step": 676500 + }, + { + "epoch": 2.58, + "learning_rate": 7.078660803497614e-06, + "loss": 7.4729, + "step": 677000 + }, + { + "epoch": 2.58, + "learning_rate": 7.046961143825161e-06, + "loss": 7.479, + "step": 677500 + }, + { + "epoch": 2.58, + "learning_rate": 7.0152614841527065e-06, + "loss": 7.4898, + "step": 678000 + }, + { + "epoch": 2.58, + "learning_rate": 6.983561824480253e-06, + "loss": 7.5005, + "step": 678500 + }, + { + "epoch": 2.58, + "learning_rate": 6.951862164807798e-06, + "loss": 7.4854, + "step": 679000 + }, + { + "epoch": 2.58, + "learning_rate": 6.920162505135346e-06, + "loss": 7.4771, + "step": 679500 + }, + { + "epoch": 2.59, + "learning_rate": 6.888462845462891e-06, + "loss": 7.4996, + "step": 680000 + }, + { + "epoch": 2.59, + "learning_rate": 6.856763185790438e-06, + "loss": 7.5023, + "step": 680500 + }, + { + "epoch": 2.59, + "learning_rate": 6.825063526117983e-06, + "loss": 7.4873, + "step": 681000 + }, + { + "epoch": 2.59, + "learning_rate": 6.79336386644553e-06, + "loss": 7.4781, + "step": 681500 + }, + { + "epoch": 2.59, + "learning_rate": 6.761664206773076e-06, + "loss": 7.4824, + "step": 682000 + }, + { + "epoch": 2.6, + "learning_rate": 6.729964547100623e-06, + "loss": 7.4792, + "step": 682500 + }, + { + "epoch": 2.6, + "learning_rate": 6.6982648874281685e-06, + "loss": 7.4833, + "step": 683000 + }, + { + "epoch": 2.6, + "learning_rate": 6.666565227755715e-06, + "loss": 7.4883, + "step": 683500 + }, + { + "epoch": 2.6, + "learning_rate": 6.634865568083261e-06, + "loss": 7.4876, + "step": 684000 + }, + { + "epoch": 2.6, + "learning_rate": 6.603165908410808e-06, + "loss": 7.4966, + "step": 684500 + }, + { + "epoch": 2.61, + "learning_rate": 6.571466248738354e-06, + "loss": 7.494, + "step": 685000 + }, + { + "epoch": 2.61, + "learning_rate": 6.5397665890659e-06, + "loss": 7.4769, + "step": 685500 + }, + { + "epoch": 2.61, + "learning_rate": 6.508066929393445e-06, + "loss": 7.4921, + "step": 686000 + }, + { + "epoch": 2.61, + "learning_rate": 6.476367269720993e-06, + "loss": 7.4946, + "step": 686500 + }, + { + "epoch": 2.61, + "learning_rate": 6.444667610048538e-06, + "loss": 7.4877, + "step": 687000 + }, + { + "epoch": 2.62, + "learning_rate": 6.4129679503760854e-06, + "loss": 7.4814, + "step": 687500 + }, + { + "epoch": 2.62, + "learning_rate": 6.381268290703632e-06, + "loss": 7.4903, + "step": 688000 + }, + { + "epoch": 2.62, + "learning_rate": 6.349568631031177e-06, + "loss": 7.4983, + "step": 688500 + }, + { + "epoch": 2.62, + "learning_rate": 6.317868971358725e-06, + "loss": 7.4839, + "step": 689000 + }, + { + "epoch": 2.62, + "learning_rate": 6.28616931168627e-06, + "loss": 7.5113, + "step": 689500 + }, + { + "epoch": 2.62, + "learning_rate": 6.254469652013817e-06, + "loss": 7.4889, + "step": 690000 + }, + { + "epoch": 2.63, + "learning_rate": 6.222769992341362e-06, + "loss": 7.4904, + "step": 690500 + }, + { + "epoch": 2.63, + "learning_rate": 6.191070332668909e-06, + "loss": 7.4978, + "step": 691000 + }, + { + "epoch": 2.63, + "learning_rate": 6.159370672996455e-06, + "loss": 7.4832, + "step": 691500 + }, + { + "epoch": 2.63, + "learning_rate": 6.127671013324001e-06, + "loss": 7.485, + "step": 692000 + }, + { + "epoch": 2.63, + "learning_rate": 6.095971353651547e-06, + "loss": 7.4716, + "step": 692500 + }, + { + "epoch": 2.64, + "learning_rate": 6.064271693979093e-06, + "loss": 7.5012, + "step": 693000 + }, + { + "epoch": 2.64, + "learning_rate": 6.03257203430664e-06, + "loss": 7.4798, + "step": 693500 + }, + { + "epoch": 2.64, + "learning_rate": 6.000872374634186e-06, + "loss": 7.4806, + "step": 694000 + }, + { + "epoch": 2.64, + "learning_rate": 5.9691727149617325e-06, + "loss": 7.486, + "step": 694500 + }, + { + "epoch": 2.64, + "learning_rate": 5.937473055289278e-06, + "loss": 7.4836, + "step": 695000 + }, + { + "epoch": 2.65, + "learning_rate": 5.905773395616825e-06, + "loss": 7.4876, + "step": 695500 + }, + { + "epoch": 2.65, + "learning_rate": 5.874073735944372e-06, + "loss": 7.468, + "step": 696000 + }, + { + "epoch": 2.65, + "learning_rate": 5.842374076271918e-06, + "loss": 7.4871, + "step": 696500 + }, + { + "epoch": 2.65, + "learning_rate": 5.810674416599464e-06, + "loss": 7.493, + "step": 697000 + }, + { + "epoch": 2.65, + "learning_rate": 5.77897475692701e-06, + "loss": 7.4734, + "step": 697500 + }, + { + "epoch": 2.66, + "learning_rate": 5.747275097254556e-06, + "loss": 7.4704, + "step": 698000 + }, + { + "epoch": 2.66, + "learning_rate": 5.715575437582103e-06, + "loss": 7.4867, + "step": 698500 + }, + { + "epoch": 2.66, + "learning_rate": 5.683875777909649e-06, + "loss": 7.4739, + "step": 699000 + }, + { + "epoch": 2.66, + "learning_rate": 5.652176118237195e-06, + "loss": 7.4763, + "step": 699500 + }, + { + "epoch": 2.66, + "learning_rate": 5.620476458564741e-06, + "loss": 7.483, + "step": 700000 + }, + { + "epoch": 2.66, + "learning_rate": 5.588776798892288e-06, + "loss": 7.4921, + "step": 700500 + }, + { + "epoch": 2.67, + "learning_rate": 5.557077139219834e-06, + "loss": 7.4833, + "step": 701000 + }, + { + "epoch": 2.67, + "learning_rate": 5.52537747954738e-06, + "loss": 7.497, + "step": 701500 + }, + { + "epoch": 2.67, + "learning_rate": 5.493677819874926e-06, + "loss": 7.4893, + "step": 702000 + }, + { + "epoch": 2.67, + "learning_rate": 5.461978160202472e-06, + "loss": 7.4757, + "step": 702500 + }, + { + "epoch": 2.67, + "learning_rate": 5.430278500530019e-06, + "loss": 7.4707, + "step": 703000 + }, + { + "epoch": 2.68, + "learning_rate": 5.398578840857565e-06, + "loss": 7.4749, + "step": 703500 + }, + { + "epoch": 2.68, + "learning_rate": 5.366879181185111e-06, + "loss": 7.4802, + "step": 704000 + }, + { + "epoch": 2.68, + "learning_rate": 5.335179521512657e-06, + "loss": 7.4991, + "step": 704500 + }, + { + "epoch": 2.68, + "learning_rate": 5.303479861840203e-06, + "loss": 7.4917, + "step": 705000 + }, + { + "epoch": 2.68, + "learning_rate": 5.27178020216775e-06, + "loss": 7.4783, + "step": 705500 + }, + { + "epoch": 2.69, + "learning_rate": 5.240080542495296e-06, + "loss": 7.4963, + "step": 706000 + }, + { + "epoch": 2.69, + "learning_rate": 5.208380882822842e-06, + "loss": 7.4929, + "step": 706500 + }, + { + "epoch": 2.69, + "learning_rate": 5.176681223150388e-06, + "loss": 7.4678, + "step": 707000 + }, + { + "epoch": 2.69, + "learning_rate": 5.144981563477935e-06, + "loss": 7.5026, + "step": 707500 + }, + { + "epoch": 2.69, + "learning_rate": 5.113281903805481e-06, + "loss": 7.4733, + "step": 708000 + }, + { + "epoch": 2.7, + "learning_rate": 5.081582244133027e-06, + "loss": 7.4956, + "step": 708500 + }, + { + "epoch": 2.7, + "learning_rate": 5.049882584460573e-06, + "loss": 7.4927, + "step": 709000 + }, + { + "epoch": 2.7, + "learning_rate": 5.018182924788119e-06, + "loss": 7.4927, + "step": 709500 + }, + { + "epoch": 2.7, + "learning_rate": 4.986483265115666e-06, + "loss": 7.4829, + "step": 710000 + }, + { + "epoch": 2.7, + "learning_rate": 4.954783605443212e-06, + "loss": 7.4863, + "step": 710500 + }, + { + "epoch": 2.7, + "learning_rate": 4.9230839457707585e-06, + "loss": 7.4888, + "step": 711000 + }, + { + "epoch": 2.71, + "learning_rate": 4.891384286098305e-06, + "loss": 7.4863, + "step": 711500 + }, + { + "epoch": 2.71, + "learning_rate": 4.859684626425851e-06, + "loss": 7.4832, + "step": 712000 + }, + { + "epoch": 2.71, + "learning_rate": 4.827984966753398e-06, + "loss": 7.4808, + "step": 712500 + }, + { + "epoch": 2.71, + "learning_rate": 4.796285307080944e-06, + "loss": 7.4834, + "step": 713000 + }, + { + "epoch": 2.71, + "learning_rate": 4.7645856474084895e-06, + "loss": 7.4914, + "step": 713500 + }, + { + "epoch": 2.72, + "learning_rate": 4.732885987736036e-06, + "loss": 7.4729, + "step": 714000 + }, + { + "epoch": 2.72, + "learning_rate": 4.701186328063582e-06, + "loss": 7.4726, + "step": 714500 + }, + { + "epoch": 2.72, + "learning_rate": 4.669486668391129e-06, + "loss": 7.4795, + "step": 715000 + }, + { + "epoch": 2.72, + "learning_rate": 4.637787008718675e-06, + "loss": 7.4856, + "step": 715500 + }, + { + "epoch": 2.72, + "learning_rate": 4.606087349046221e-06, + "loss": 7.4836, + "step": 716000 + }, + { + "epoch": 2.73, + "learning_rate": 4.574387689373767e-06, + "loss": 7.4901, + "step": 716500 + }, + { + "epoch": 2.73, + "learning_rate": 4.542688029701313e-06, + "loss": 7.4942, + "step": 717000 + }, + { + "epoch": 2.73, + "learning_rate": 4.51098837002886e-06, + "loss": 7.4943, + "step": 717500 + }, + { + "epoch": 2.73, + "learning_rate": 4.479288710356406e-06, + "loss": 7.4813, + "step": 718000 + }, + { + "epoch": 2.73, + "learning_rate": 4.447589050683952e-06, + "loss": 7.4915, + "step": 718500 + }, + { + "epoch": 2.74, + "learning_rate": 4.415889391011498e-06, + "loss": 7.4744, + "step": 719000 + }, + { + "epoch": 2.74, + "learning_rate": 4.384189731339045e-06, + "loss": 7.4867, + "step": 719500 + }, + { + "epoch": 2.74, + "learning_rate": 4.352490071666591e-06, + "loss": 7.4755, + "step": 720000 + }, + { + "epoch": 2.74, + "learning_rate": 4.3207904119941366e-06, + "loss": 7.4858, + "step": 720500 + }, + { + "epoch": 2.74, + "learning_rate": 4.289090752321683e-06, + "loss": 7.4846, + "step": 721000 + }, + { + "epoch": 2.74, + "learning_rate": 4.257391092649229e-06, + "loss": 7.4888, + "step": 721500 + }, + { + "epoch": 2.75, + "learning_rate": 4.225691432976776e-06, + "loss": 7.4856, + "step": 722000 + }, + { + "epoch": 2.75, + "learning_rate": 4.193991773304322e-06, + "loss": 7.5096, + "step": 722500 + }, + { + "epoch": 2.75, + "learning_rate": 4.162292113631868e-06, + "loss": 7.4782, + "step": 723000 + }, + { + "epoch": 2.75, + "learning_rate": 4.130592453959414e-06, + "loss": 7.4777, + "step": 723500 + }, + { + "epoch": 2.75, + "learning_rate": 4.09889279428696e-06, + "loss": 7.4838, + "step": 724000 + }, + { + "epoch": 2.76, + "learning_rate": 4.067193134614507e-06, + "loss": 7.4883, + "step": 724500 + }, + { + "epoch": 2.76, + "learning_rate": 4.035493474942053e-06, + "loss": 7.4824, + "step": 725000 + }, + { + "epoch": 2.76, + "learning_rate": 4.003793815269599e-06, + "loss": 7.4915, + "step": 725500 + }, + { + "epoch": 2.76, + "learning_rate": 3.972094155597145e-06, + "loss": 7.4854, + "step": 726000 + }, + { + "epoch": 2.76, + "learning_rate": 3.940394495924692e-06, + "loss": 7.4829, + "step": 726500 + }, + { + "epoch": 2.77, + "learning_rate": 3.908694836252238e-06, + "loss": 7.4947, + "step": 727000 + }, + { + "epoch": 2.77, + "learning_rate": 3.8769951765797845e-06, + "loss": 7.49, + "step": 727500 + }, + { + "epoch": 2.77, + "learning_rate": 3.845295516907331e-06, + "loss": 7.4696, + "step": 728000 + }, + { + "epoch": 2.77, + "learning_rate": 3.813595857234877e-06, + "loss": 7.4842, + "step": 728500 + }, + { + "epoch": 2.77, + "learning_rate": 3.7818961975624233e-06, + "loss": 7.5, + "step": 729000 + }, + { + "epoch": 2.77, + "learning_rate": 3.7501965378899696e-06, + "loss": 7.4932, + "step": 729500 + }, + { + "epoch": 2.78, + "learning_rate": 3.718496878217516e-06, + "loss": 7.4867, + "step": 730000 + }, + { + "epoch": 2.78, + "learning_rate": 3.686797218545062e-06, + "loss": 7.4942, + "step": 730500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6550975588726085e-06, + "loss": 7.4817, + "step": 731000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6233978992001543e-06, + "loss": 7.4858, + "step": 731500 + }, + { + "epoch": 2.78, + "learning_rate": 3.5916982395277006e-06, + "loss": 7.484, + "step": 732000 + }, + { + "epoch": 2.79, + "learning_rate": 3.559998579855247e-06, + "loss": 7.4813, + "step": 732500 + }, + { + "epoch": 2.79, + "learning_rate": 3.528298920182793e-06, + "loss": 7.4867, + "step": 733000 + }, + { + "epoch": 2.79, + "learning_rate": 3.4965992605103394e-06, + "loss": 7.5001, + "step": 733500 + }, + { + "epoch": 2.79, + "learning_rate": 3.4648996008378857e-06, + "loss": 7.4765, + "step": 734000 + }, + { + "epoch": 2.79, + "learning_rate": 3.433199941165432e-06, + "loss": 7.4821, + "step": 734500 + }, + { + "epoch": 2.8, + "learning_rate": 3.401500281492978e-06, + "loss": 7.4928, + "step": 735000 + }, + { + "epoch": 2.8, + "learning_rate": 3.369800621820524e-06, + "loss": 7.4892, + "step": 735500 + }, + { + "epoch": 2.8, + "learning_rate": 3.3381009621480704e-06, + "loss": 7.492, + "step": 736000 + }, + { + "epoch": 2.8, + "learning_rate": 3.3064013024756167e-06, + "loss": 7.502, + "step": 736500 + }, + { + "epoch": 2.8, + "learning_rate": 3.274701642803163e-06, + "loss": 7.4886, + "step": 737000 + }, + { + "epoch": 2.81, + "learning_rate": 3.2430019831307093e-06, + "loss": 7.478, + "step": 737500 + }, + { + "epoch": 2.81, + "learning_rate": 3.2113023234582555e-06, + "loss": 7.4988, + "step": 738000 + }, + { + "epoch": 2.81, + "learning_rate": 3.1796026637858014e-06, + "loss": 7.5066, + "step": 738500 + }, + { + "epoch": 2.81, + "learning_rate": 3.1479030041133477e-06, + "loss": 7.4705, + "step": 739000 + }, + { + "epoch": 2.81, + "learning_rate": 3.1162033444408944e-06, + "loss": 7.4903, + "step": 739500 + }, + { + "epoch": 2.81, + "learning_rate": 3.0845036847684407e-06, + "loss": 7.4835, + "step": 740000 + }, + { + "epoch": 2.82, + "learning_rate": 3.052804025095987e-06, + "loss": 7.5007, + "step": 740500 + }, + { + "epoch": 2.82, + "learning_rate": 3.0211043654235332e-06, + "loss": 7.4836, + "step": 741000 + }, + { + "epoch": 2.82, + "learning_rate": 2.989404705751079e-06, + "loss": 7.4821, + "step": 741500 + }, + { + "epoch": 2.82, + "learning_rate": 2.9577050460786254e-06, + "loss": 7.4831, + "step": 742000 + }, + { + "epoch": 2.82, + "learning_rate": 2.9260053864061716e-06, + "loss": 7.4771, + "step": 742500 + }, + { + "epoch": 2.83, + "learning_rate": 2.894305726733718e-06, + "loss": 7.4929, + "step": 743000 + }, + { + "epoch": 2.83, + "learning_rate": 2.862606067061264e-06, + "loss": 7.4817, + "step": 743500 + }, + { + "epoch": 2.83, + "learning_rate": 2.8309064073888105e-06, + "loss": 7.4738, + "step": 744000 + }, + { + "epoch": 2.83, + "learning_rate": 2.7992067477163568e-06, + "loss": 7.4834, + "step": 744500 + }, + { + "epoch": 2.83, + "learning_rate": 2.7675070880439026e-06, + "loss": 7.4874, + "step": 745000 + }, + { + "epoch": 2.84, + "learning_rate": 2.735807428371449e-06, + "loss": 7.4922, + "step": 745500 + }, + { + "epoch": 2.84, + "learning_rate": 2.704107768698995e-06, + "loss": 7.4841, + "step": 746000 + }, + { + "epoch": 2.84, + "learning_rate": 2.6724081090265415e-06, + "loss": 7.4811, + "step": 746500 + }, + { + "epoch": 2.84, + "learning_rate": 2.6407084493540877e-06, + "loss": 7.5024, + "step": 747000 + }, + { + "epoch": 2.84, + "learning_rate": 2.6090087896816345e-06, + "loss": 7.4905, + "step": 747500 + }, + { + "epoch": 2.85, + "learning_rate": 2.5773091300091803e-06, + "loss": 7.4966, + "step": 748000 + }, + { + "epoch": 2.85, + "learning_rate": 2.5456094703367266e-06, + "loss": 7.4824, + "step": 748500 + }, + { + "epoch": 2.85, + "learning_rate": 2.513909810664273e-06, + "loss": 7.4926, + "step": 749000 + }, + { + "epoch": 2.85, + "learning_rate": 2.482210150991819e-06, + "loss": 7.5065, + "step": 749500 + }, + { + "epoch": 2.85, + "learning_rate": 2.4505104913193654e-06, + "loss": 7.489, + "step": 750000 + }, + { + "epoch": 2.85, + "learning_rate": 2.4188108316469117e-06, + "loss": 7.4916, + "step": 750500 + }, + { + "epoch": 2.86, + "learning_rate": 2.387111171974458e-06, + "loss": 7.4795, + "step": 751000 + }, + { + "epoch": 2.86, + "learning_rate": 2.355411512302004e-06, + "loss": 7.4899, + "step": 751500 + }, + { + "epoch": 2.86, + "learning_rate": 2.32371185262955e-06, + "loss": 7.4849, + "step": 752000 + }, + { + "epoch": 2.86, + "learning_rate": 2.2920121929570964e-06, + "loss": 7.4987, + "step": 752500 + }, + { + "epoch": 2.86, + "learning_rate": 2.2603125332846427e-06, + "loss": 7.492, + "step": 753000 + }, + { + "epoch": 2.87, + "learning_rate": 2.228612873612189e-06, + "loss": 7.4875, + "step": 753500 + }, + { + "epoch": 2.87, + "learning_rate": 2.1969132139397353e-06, + "loss": 7.4801, + "step": 754000 + }, + { + "epoch": 2.87, + "learning_rate": 2.1652135542672815e-06, + "loss": 7.4815, + "step": 754500 + }, + { + "epoch": 2.87, + "learning_rate": 2.1335138945948274e-06, + "loss": 7.4784, + "step": 755000 + }, + { + "epoch": 2.87, + "learning_rate": 2.101814234922374e-06, + "loss": 7.49, + "step": 755500 + }, + { + "epoch": 2.88, + "learning_rate": 2.0701145752499204e-06, + "loss": 7.4881, + "step": 756000 + }, + { + "epoch": 2.88, + "learning_rate": 2.0384149155774667e-06, + "loss": 7.5105, + "step": 756500 + }, + { + "epoch": 2.88, + "learning_rate": 2.006715255905013e-06, + "loss": 7.474, + "step": 757000 + }, + { + "epoch": 2.88, + "learning_rate": 1.9750155962325592e-06, + "loss": 7.4827, + "step": 757500 + }, + { + "epoch": 2.88, + "learning_rate": 1.943315936560105e-06, + "loss": 7.4983, + "step": 758000 + }, + { + "epoch": 2.89, + "learning_rate": 1.9116162768876514e-06, + "loss": 7.4819, + "step": 758500 + }, + { + "epoch": 2.89, + "learning_rate": 1.8799166172151976e-06, + "loss": 7.4906, + "step": 759000 + }, + { + "epoch": 2.89, + "learning_rate": 1.848216957542744e-06, + "loss": 7.4869, + "step": 759500 + }, + { + "epoch": 2.89, + "learning_rate": 1.8165172978702902e-06, + "loss": 7.5035, + "step": 760000 + }, + { + "epoch": 2.89, + "learning_rate": 1.7848176381978363e-06, + "loss": 7.4905, + "step": 760500 + }, + { + "epoch": 2.89, + "learning_rate": 1.7531179785253826e-06, + "loss": 7.5053, + "step": 761000 + }, + { + "epoch": 2.9, + "learning_rate": 1.7214183188529288e-06, + "loss": 7.4814, + "step": 761500 + }, + { + "epoch": 2.9, + "learning_rate": 1.689718659180475e-06, + "loss": 7.4942, + "step": 762000 + }, + { + "epoch": 2.9, + "learning_rate": 1.6580189995080212e-06, + "loss": 7.4941, + "step": 762500 + }, + { + "epoch": 2.9, + "learning_rate": 1.6263193398355675e-06, + "loss": 7.4806, + "step": 763000 + }, + { + "epoch": 2.9, + "learning_rate": 1.594619680163114e-06, + "loss": 7.488, + "step": 763500 + }, + { + "epoch": 2.91, + "learning_rate": 1.5629200204906602e-06, + "loss": 7.4943, + "step": 764000 + }, + { + "epoch": 2.91, + "learning_rate": 1.5312203608182063e-06, + "loss": 7.4968, + "step": 764500 + }, + { + "epoch": 2.91, + "learning_rate": 1.4995207011457526e-06, + "loss": 7.4926, + "step": 765000 + }, + { + "epoch": 2.91, + "learning_rate": 1.4678210414732989e-06, + "loss": 7.4796, + "step": 765500 + }, + { + "epoch": 2.91, + "learning_rate": 1.4361213818008451e-06, + "loss": 7.4879, + "step": 766000 + }, + { + "epoch": 2.92, + "learning_rate": 1.4044217221283914e-06, + "loss": 7.4766, + "step": 766500 + }, + { + "epoch": 2.92, + "learning_rate": 1.3727220624559375e-06, + "loss": 7.4799, + "step": 767000 + }, + { + "epoch": 2.92, + "learning_rate": 1.3410224027834838e-06, + "loss": 7.4736, + "step": 767500 + }, + { + "epoch": 2.92, + "learning_rate": 1.30932274311103e-06, + "loss": 7.4757, + "step": 768000 + }, + { + "epoch": 2.92, + "learning_rate": 1.2776230834385761e-06, + "loss": 7.4907, + "step": 768500 + }, + { + "epoch": 2.93, + "learning_rate": 1.2459234237661224e-06, + "loss": 7.4913, + "step": 769000 + }, + { + "epoch": 2.93, + "learning_rate": 1.214223764093669e-06, + "loss": 7.4808, + "step": 769500 + }, + { + "epoch": 2.93, + "learning_rate": 1.182524104421215e-06, + "loss": 7.4841, + "step": 770000 + }, + { + "epoch": 2.93, + "learning_rate": 1.1508244447487612e-06, + "loss": 7.4796, + "step": 770500 + }, + { + "epoch": 2.93, + "learning_rate": 1.1191247850763075e-06, + "loss": 7.4991, + "step": 771000 + }, + { + "epoch": 2.93, + "learning_rate": 1.0874251254038536e-06, + "loss": 7.489, + "step": 771500 + }, + { + "epoch": 2.94, + "learning_rate": 1.0557254657313999e-06, + "loss": 7.4808, + "step": 772000 + }, + { + "epoch": 2.94, + "learning_rate": 1.0240258060589462e-06, + "loss": 7.4912, + "step": 772500 + }, + { + "epoch": 2.94, + "learning_rate": 9.923261463864924e-07, + "loss": 7.4826, + "step": 773000 + }, + { + "epoch": 2.94, + "learning_rate": 9.606264867140387e-07, + "loss": 7.4879, + "step": 773500 + }, + { + "epoch": 2.94, + "learning_rate": 9.28926827041585e-07, + "loss": 7.4882, + "step": 774000 + }, + { + "epoch": 2.95, + "learning_rate": 8.972271673691312e-07, + "loss": 7.5192, + "step": 774500 + }, + { + "epoch": 2.95, + "learning_rate": 8.655275076966775e-07, + "loss": 7.4925, + "step": 775000 + }, + { + "epoch": 2.95, + "learning_rate": 8.338278480242236e-07, + "loss": 7.4822, + "step": 775500 + }, + { + "epoch": 2.95, + "learning_rate": 8.021281883517698e-07, + "loss": 7.4826, + "step": 776000 + }, + { + "epoch": 2.95, + "learning_rate": 7.704285286793162e-07, + "loss": 7.5019, + "step": 776500 + }, + { + "epoch": 2.96, + "learning_rate": 7.387288690068624e-07, + "loss": 7.4893, + "step": 777000 + }, + { + "epoch": 2.96, + "learning_rate": 7.070292093344086e-07, + "loss": 7.4881, + "step": 777500 + }, + { + "epoch": 2.96, + "learning_rate": 6.753295496619548e-07, + "loss": 7.4814, + "step": 778000 + }, + { + "epoch": 2.96, + "learning_rate": 6.436298899895011e-07, + "loss": 7.4952, + "step": 778500 + }, + { + "epoch": 2.96, + "learning_rate": 6.119302303170474e-07, + "loss": 7.5016, + "step": 779000 + }, + { + "epoch": 2.97, + "learning_rate": 5.802305706445936e-07, + "loss": 7.4836, + "step": 779500 + }, + { + "epoch": 2.97, + "learning_rate": 5.485309109721397e-07, + "loss": 7.5027, + "step": 780000 + }, + { + "epoch": 2.97, + "learning_rate": 5.168312512996861e-07, + "loss": 7.5021, + "step": 780500 + }, + { + "epoch": 2.97, + "learning_rate": 4.851315916272323e-07, + "loss": 7.4949, + "step": 781000 + }, + { + "epoch": 2.97, + "learning_rate": 4.534319319547785e-07, + "loss": 7.4767, + "step": 781500 + }, + { + "epoch": 2.97, + "learning_rate": 4.2173227228232475e-07, + "loss": 7.4891, + "step": 782000 + }, + { + "epoch": 2.98, + "learning_rate": 3.9003261260987103e-07, + "loss": 7.4797, + "step": 782500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5833295293741726e-07, + "loss": 7.502, + "step": 783000 + }, + { + "epoch": 2.98, + "learning_rate": 3.266332932649635e-07, + "loss": 7.4929, + "step": 783500 + }, + { + "epoch": 2.98, + "learning_rate": 2.9493363359250977e-07, + "loss": 7.4897, + "step": 784000 + }, + { + "epoch": 2.98, + "learning_rate": 2.63233973920056e-07, + "loss": 7.4833, + "step": 784500 + }, + { + "epoch": 2.99, + "learning_rate": 2.3153431424760225e-07, + "loss": 7.4747, + "step": 785000 + }, + { + "epoch": 2.99, + "learning_rate": 1.9983465457514848e-07, + "loss": 7.485, + "step": 785500 + }, + { + "epoch": 2.99, + "learning_rate": 1.6813499490269473e-07, + "loss": 7.4975, + "step": 786000 + }, + { + "epoch": 2.99, + "learning_rate": 1.36435335230241e-07, + "loss": 7.4765, + "step": 786500 + }, + { + "epoch": 2.99, + "learning_rate": 1.0473567555778721e-07, + "loss": 7.482, + "step": 787000 + }, + { + "epoch": 3.0, + "learning_rate": 7.303601588533346e-08, + "loss": 7.4931, + "step": 787500 + }, + { + "epoch": 3.0, + "learning_rate": 4.13363562128797e-08, + "loss": 7.4786, + "step": 788000 + }, + { + "epoch": 3.0, + "learning_rate": 9.636696540425943e-09, + "loss": 7.4738, + "step": 788500 + }, + { + "epoch": 3.0, + "step": 788652, + "total_flos": 1.3227171767511228e+19, + "train_loss": 7.491424576953926, + "train_runtime": 331955.1531, + "train_samples_per_second": 19.006, + "train_steps_per_second": 2.376 + } + ], + "max_steps": 788652, + "num_train_epochs": 3, + "total_flos": 1.3227171767511228e+19, + "trial_name": null, + "trial_params": null +}