{ "best_metric": 29.1144, "best_model_checkpoint": "./ko-en_mbartLarge_exp20p_linear_alpha/checkpoint-60000", "epoch": 6.9613644274277755, "eval_steps": 4000, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 5.4920234365935726e-05, "loss": 2.3766, "step": 500 }, { "epoch": 0.12, "learning_rate": 5.484046873187145e-05, "loss": 1.7994, "step": 1000 }, { "epoch": 0.17, "learning_rate": 5.476070309780717e-05, "loss": 1.6333, "step": 1500 }, { "epoch": 0.23, "learning_rate": 5.4680937463742896e-05, "loss": 1.554, "step": 2000 }, { "epoch": 0.29, "learning_rate": 5.460117182967862e-05, "loss": 1.5092, "step": 2500 }, { "epoch": 0.35, "learning_rate": 5.4521406195614344e-05, "loss": 1.4709, "step": 3000 }, { "epoch": 0.41, "learning_rate": 5.444164056155007e-05, "loss": 1.4354, "step": 3500 }, { "epoch": 0.46, "learning_rate": 5.4361874927485784e-05, "loss": 1.404, "step": 4000 }, { "epoch": 0.46, "eval_bleu": 22.5375, "eval_gen_len": 18.6852, "eval_loss": 1.373841404914856, "eval_runtime": 1144.9051, "eval_samples_per_second": 15.055, "eval_steps_per_second": 0.942, "step": 4000 }, { "epoch": 0.52, "learning_rate": 5.4282109293421515e-05, "loss": 1.3753, "step": 4500 }, { "epoch": 0.58, "learning_rate": 5.420234365935724e-05, "loss": 1.3399, "step": 5000 }, { "epoch": 0.64, "learning_rate": 5.412257802529296e-05, "loss": 1.3272, "step": 5500 }, { "epoch": 0.7, "learning_rate": 5.404281239122868e-05, "loss": 1.3226, "step": 6000 }, { "epoch": 0.75, "learning_rate": 5.39630467571644e-05, "loss": 1.2978, "step": 6500 }, { "epoch": 0.81, "learning_rate": 5.388328112310013e-05, "loss": 1.269, "step": 7000 }, { "epoch": 0.87, "learning_rate": 5.3803515489035856e-05, "loss": 1.2787, "step": 7500 }, { "epoch": 0.93, "learning_rate": 5.372374985497158e-05, "loss": 1.2629, "step": 8000 }, { "epoch": 0.93, "eval_bleu": 25.3741, "eval_gen_len": 18.7797, "eval_loss": 1.245756983757019, "eval_runtime": 1156.1784, "eval_samples_per_second": 14.909, "eval_steps_per_second": 0.932, "step": 8000 }, { "epoch": 0.99, "learning_rate": 5.36439842209073e-05, "loss": 1.2572, "step": 8500 }, { "epoch": 1.04, "learning_rate": 5.356421858684302e-05, "loss": 1.2035, "step": 9000 }, { "epoch": 1.1, "learning_rate": 5.348445295277875e-05, "loss": 1.1945, "step": 9500 }, { "epoch": 1.16, "learning_rate": 5.3404687318714474e-05, "loss": 1.179, "step": 10000 }, { "epoch": 1.22, "learning_rate": 5.332492168465019e-05, "loss": 1.2032, "step": 10500 }, { "epoch": 1.28, "learning_rate": 5.3245156050585915e-05, "loss": 1.192, "step": 11000 }, { "epoch": 1.33, "learning_rate": 5.316539041652164e-05, "loss": 1.1795, "step": 11500 }, { "epoch": 1.39, "learning_rate": 5.308562478245736e-05, "loss": 1.1951, "step": 12000 }, { "epoch": 1.39, "eval_bleu": 26.1281, "eval_gen_len": 18.6597, "eval_loss": 1.2066867351531982, "eval_runtime": 1158.2083, "eval_samples_per_second": 14.882, "eval_steps_per_second": 0.931, "step": 12000 }, { "epoch": 1.45, "learning_rate": 5.300585914839309e-05, "loss": 1.1704, "step": 12500 }, { "epoch": 1.51, "learning_rate": 5.292609351432881e-05, "loss": 1.1547, "step": 13000 }, { "epoch": 1.57, "learning_rate": 5.284632788026453e-05, "loss": 1.1419, "step": 13500 }, { "epoch": 1.62, "learning_rate": 5.276656224620026e-05, "loss": 1.1396, "step": 14000 }, { "epoch": 1.68, "learning_rate": 5.268679661213598e-05, "loss": 1.1549, "step": 14500 }, { "epoch": 1.74, "learning_rate": 5.2607030978071704e-05, "loss": 1.1255, "step": 15000 }, { "epoch": 1.8, "learning_rate": 5.252726534400743e-05, "loss": 1.1395, "step": 15500 }, { "epoch": 1.86, "learning_rate": 5.244749970994315e-05, "loss": 1.1317, "step": 16000 }, { "epoch": 1.86, "eval_bleu": 26.5384, "eval_gen_len": 19.2055, "eval_loss": 1.1767752170562744, "eval_runtime": 1178.3467, "eval_samples_per_second": 14.628, "eval_steps_per_second": 0.915, "step": 16000 }, { "epoch": 1.91, "learning_rate": 5.2367734075878875e-05, "loss": 1.1557, "step": 16500 }, { "epoch": 1.97, "learning_rate": 5.22879684418146e-05, "loss": 1.1478, "step": 17000 }, { "epoch": 2.03, "learning_rate": 5.2208202807750315e-05, "loss": 1.0771, "step": 17500 }, { "epoch": 2.09, "learning_rate": 5.2128437173686046e-05, "loss": 1.0135, "step": 18000 }, { "epoch": 2.15, "learning_rate": 5.204867153962177e-05, "loss": 1.0028, "step": 18500 }, { "epoch": 2.2, "learning_rate": 5.196890590555749e-05, "loss": 0.9894, "step": 19000 }, { "epoch": 2.26, "learning_rate": 5.1889140271493216e-05, "loss": 1.0178, "step": 19500 }, { "epoch": 2.32, "learning_rate": 5.180937463742893e-05, "loss": 0.9906, "step": 20000 }, { "epoch": 2.32, "eval_bleu": 28.2459, "eval_gen_len": 18.7269, "eval_loss": 1.1362603902816772, "eval_runtime": 1157.7765, "eval_samples_per_second": 14.888, "eval_steps_per_second": 0.931, "step": 20000 }, { "epoch": 2.38, "learning_rate": 5.1729609003364664e-05, "loss": 1.0083, "step": 20500 }, { "epoch": 2.44, "learning_rate": 5.164984336930039e-05, "loss": 0.9965, "step": 21000 }, { "epoch": 2.49, "learning_rate": 5.157007773523611e-05, "loss": 0.9992, "step": 21500 }, { "epoch": 2.55, "learning_rate": 5.149031210117183e-05, "loss": 1.0042, "step": 22000 }, { "epoch": 2.61, "learning_rate": 5.141054646710755e-05, "loss": 0.9998, "step": 22500 }, { "epoch": 2.67, "learning_rate": 5.1330780833043275e-05, "loss": 1.0076, "step": 23000 }, { "epoch": 2.73, "learning_rate": 5.1251015198979005e-05, "loss": 1.0047, "step": 23500 }, { "epoch": 2.78, "learning_rate": 5.117124956491473e-05, "loss": 0.9894, "step": 24000 }, { "epoch": 2.78, "eval_bleu": 28.5124, "eval_gen_len": 18.6882, "eval_loss": 1.1238752603530884, "eval_runtime": 1152.8912, "eval_samples_per_second": 14.951, "eval_steps_per_second": 0.935, "step": 24000 }, { "epoch": 2.84, "learning_rate": 5.1091483930850446e-05, "loss": 1.0115, "step": 24500 }, { "epoch": 2.9, "learning_rate": 5.101171829678617e-05, "loss": 0.9958, "step": 25000 }, { "epoch": 2.96, "learning_rate": 5.093195266272189e-05, "loss": 1.0044, "step": 25500 }, { "epoch": 3.02, "learning_rate": 5.0852187028657624e-05, "loss": 0.9668, "step": 26000 }, { "epoch": 3.07, "learning_rate": 5.077242139459334e-05, "loss": 0.8751, "step": 26500 }, { "epoch": 3.13, "learning_rate": 5.0692655760529064e-05, "loss": 0.8916, "step": 27000 }, { "epoch": 3.19, "learning_rate": 5.061289012646479e-05, "loss": 0.8861, "step": 27500 }, { "epoch": 3.25, "learning_rate": 5.053312449240051e-05, "loss": 0.8965, "step": 28000 }, { "epoch": 3.25, "eval_bleu": 28.5335, "eval_gen_len": 18.4917, "eval_loss": 1.1277672052383423, "eval_runtime": 1142.9887, "eval_samples_per_second": 15.081, "eval_steps_per_second": 0.943, "step": 28000 }, { "epoch": 3.31, "learning_rate": 5.045335885833624e-05, "loss": 0.8898, "step": 28500 }, { "epoch": 3.36, "learning_rate": 5.037359322427196e-05, "loss": 0.8982, "step": 29000 }, { "epoch": 3.42, "learning_rate": 5.029382759020768e-05, "loss": 0.896, "step": 29500 }, { "epoch": 3.48, "learning_rate": 5.0214061956143406e-05, "loss": 0.8889, "step": 30000 }, { "epoch": 3.54, "learning_rate": 5.013429632207913e-05, "loss": 0.9056, "step": 30500 }, { "epoch": 3.6, "learning_rate": 5.005453068801485e-05, "loss": 0.8867, "step": 31000 }, { "epoch": 3.65, "learning_rate": 4.997476505395058e-05, "loss": 0.911, "step": 31500 }, { "epoch": 3.71, "learning_rate": 4.98949994198863e-05, "loss": 0.9138, "step": 32000 }, { "epoch": 3.71, "eval_bleu": 28.8189, "eval_gen_len": 18.7873, "eval_loss": 1.1216284036636353, "eval_runtime": 1156.9444, "eval_samples_per_second": 14.899, "eval_steps_per_second": 0.932, "step": 32000 }, { "epoch": 3.77, "learning_rate": 4.9815233785822024e-05, "loss": 0.9023, "step": 32500 }, { "epoch": 3.83, "learning_rate": 4.973546815175775e-05, "loss": 0.9127, "step": 33000 }, { "epoch": 3.89, "learning_rate": 4.9655702517693464e-05, "loss": 0.9168, "step": 33500 }, { "epoch": 3.94, "learning_rate": 4.9575936883629195e-05, "loss": 0.8978, "step": 34000 }, { "epoch": 4.0, "learning_rate": 4.949617124956492e-05, "loss": 0.9229, "step": 34500 }, { "epoch": 4.06, "learning_rate": 4.941640561550064e-05, "loss": 0.828, "step": 35000 }, { "epoch": 4.12, "learning_rate": 4.9336639981436366e-05, "loss": 0.8326, "step": 35500 }, { "epoch": 4.18, "learning_rate": 4.925687434737208e-05, "loss": 0.8272, "step": 36000 }, { "epoch": 4.18, "eval_bleu": 28.332, "eval_gen_len": 18.6516, "eval_loss": 1.1468099355697632, "eval_runtime": 1152.7736, "eval_samples_per_second": 14.953, "eval_steps_per_second": 0.935, "step": 36000 }, { "epoch": 4.23, "learning_rate": 4.9177108713307806e-05, "loss": 0.8259, "step": 36500 }, { "epoch": 4.29, "learning_rate": 4.9097343079243536e-05, "loss": 0.8243, "step": 37000 }, { "epoch": 4.35, "learning_rate": 4.901757744517926e-05, "loss": 0.8664, "step": 37500 }, { "epoch": 4.41, "learning_rate": 4.893781181111498e-05, "loss": 0.8893, "step": 38000 }, { "epoch": 4.47, "learning_rate": 4.88580461770507e-05, "loss": 0.8958, "step": 38500 }, { "epoch": 4.52, "learning_rate": 4.8778280542986424e-05, "loss": 0.8924, "step": 39000 }, { "epoch": 4.58, "learning_rate": 4.8698514908922155e-05, "loss": 0.885, "step": 39500 }, { "epoch": 4.64, "learning_rate": 4.861874927485788e-05, "loss": 0.8753, "step": 40000 }, { "epoch": 4.64, "eval_bleu": 28.2695, "eval_gen_len": 18.4919, "eval_loss": 1.1344704627990723, "eval_runtime": 1148.1227, "eval_samples_per_second": 15.013, "eval_steps_per_second": 0.939, "step": 40000 }, { "epoch": 4.7, "learning_rate": 4.8538983640793595e-05, "loss": 0.8749, "step": 40500 }, { "epoch": 4.76, "learning_rate": 4.845921800672932e-05, "loss": 0.8621, "step": 41000 }, { "epoch": 4.81, "learning_rate": 4.837945237266504e-05, "loss": 0.8616, "step": 41500 }, { "epoch": 4.87, "learning_rate": 4.8299686738600766e-05, "loss": 0.8551, "step": 42000 }, { "epoch": 4.93, "learning_rate": 4.821992110453649e-05, "loss": 0.8504, "step": 42500 }, { "epoch": 4.99, "learning_rate": 4.814015547047221e-05, "loss": 0.8459, "step": 43000 }, { "epoch": 5.05, "learning_rate": 4.806038983640794e-05, "loss": 0.7255, "step": 43500 }, { "epoch": 5.11, "learning_rate": 4.798062420234366e-05, "loss": 0.6855, "step": 44000 }, { "epoch": 5.11, "eval_bleu": 28.7913, "eval_gen_len": 18.7596, "eval_loss": 1.154221773147583, "eval_runtime": 1168.8964, "eval_samples_per_second": 14.746, "eval_steps_per_second": 0.922, "step": 44000 }, { "epoch": 5.16, "learning_rate": 4.7900858568279384e-05, "loss": 0.7002, "step": 44500 }, { "epoch": 5.22, "learning_rate": 4.782109293421511e-05, "loss": 0.6982, "step": 45000 }, { "epoch": 5.28, "learning_rate": 4.774132730015083e-05, "loss": 0.6976, "step": 45500 }, { "epoch": 5.34, "learning_rate": 4.7661561666086555e-05, "loss": 0.7028, "step": 46000 }, { "epoch": 5.4, "learning_rate": 4.758179603202228e-05, "loss": 0.7138, "step": 46500 }, { "epoch": 5.45, "learning_rate": 4.7502030397958e-05, "loss": 0.7121, "step": 47000 }, { "epoch": 5.51, "learning_rate": 4.7422264763893726e-05, "loss": 0.7043, "step": 47500 }, { "epoch": 5.57, "learning_rate": 4.734249912982945e-05, "loss": 0.7088, "step": 48000 }, { "epoch": 5.57, "eval_bleu": 29.0865, "eval_gen_len": 18.6626, "eval_loss": 1.153067708015442, "eval_runtime": 1149.2413, "eval_samples_per_second": 14.999, "eval_steps_per_second": 0.938, "step": 48000 }, { "epoch": 5.63, "learning_rate": 4.726273349576517e-05, "loss": 0.7116, "step": 48500 }, { "epoch": 5.69, "learning_rate": 4.71829678617009e-05, "loss": 0.7292, "step": 49000 }, { "epoch": 5.74, "learning_rate": 4.710320222763662e-05, "loss": 0.7289, "step": 49500 }, { "epoch": 5.8, "learning_rate": 4.702343659357234e-05, "loss": 0.7514, "step": 50000 }, { "epoch": 5.86, "learning_rate": 4.694367095950807e-05, "loss": 0.7545, "step": 50500 }, { "epoch": 5.92, "learning_rate": 4.686390532544379e-05, "loss": 0.7362, "step": 51000 }, { "epoch": 5.98, "learning_rate": 4.6784139691379515e-05, "loss": 0.7413, "step": 51500 }, { "epoch": 6.03, "learning_rate": 4.670437405731523e-05, "loss": 0.6738, "step": 52000 }, { "epoch": 6.03, "eval_bleu": 28.0235, "eval_gen_len": 18.4243, "eval_loss": 1.190636396408081, "eval_runtime": 1139.2873, "eval_samples_per_second": 15.13, "eval_steps_per_second": 0.946, "step": 52000 }, { "epoch": 6.09, "learning_rate": 4.6624608423250955e-05, "loss": 0.6467, "step": 52500 }, { "epoch": 6.15, "learning_rate": 4.6544842789186686e-05, "loss": 0.6508, "step": 53000 }, { "epoch": 6.21, "learning_rate": 4.646507715512241e-05, "loss": 0.6742, "step": 53500 }, { "epoch": 6.27, "learning_rate": 4.638531152105813e-05, "loss": 0.6609, "step": 54000 }, { "epoch": 6.32, "learning_rate": 4.630554588699385e-05, "loss": 0.6464, "step": 54500 }, { "epoch": 6.38, "learning_rate": 4.622578025292957e-05, "loss": 0.6362, "step": 55000 }, { "epoch": 6.44, "learning_rate": 4.61460146188653e-05, "loss": 0.6552, "step": 55500 }, { "epoch": 6.5, "learning_rate": 4.606624898480103e-05, "loss": 0.6763, "step": 56000 }, { "epoch": 6.5, "eval_bleu": 28.1501, "eval_gen_len": 18.6932, "eval_loss": 1.1940782070159912, "eval_runtime": 1154.4139, "eval_samples_per_second": 14.931, "eval_steps_per_second": 0.934, "step": 56000 }, { "epoch": 6.56, "learning_rate": 4.5986483350736744e-05, "loss": 0.6775, "step": 56500 }, { "epoch": 6.61, "learning_rate": 4.590671771667247e-05, "loss": 0.6765, "step": 57000 }, { "epoch": 6.67, "learning_rate": 4.582695208260819e-05, "loss": 0.681, "step": 57500 }, { "epoch": 6.73, "learning_rate": 4.5747186448543915e-05, "loss": 0.6747, "step": 58000 }, { "epoch": 6.79, "learning_rate": 4.5667420814479645e-05, "loss": 0.6612, "step": 58500 }, { "epoch": 6.85, "learning_rate": 4.558765518041536e-05, "loss": 0.6662, "step": 59000 }, { "epoch": 6.9, "learning_rate": 4.5507889546351086e-05, "loss": 0.6653, "step": 59500 }, { "epoch": 6.96, "learning_rate": 4.542812391228681e-05, "loss": 0.6594, "step": 60000 }, { "epoch": 6.96, "eval_bleu": 29.1144, "eval_gen_len": 18.5459, "eval_loss": 1.1682050228118896, "eval_runtime": 1139.7821, "eval_samples_per_second": 15.123, "eval_steps_per_second": 0.946, "step": 60000 } ], "logging_steps": 500, "max_steps": 344760, "num_train_epochs": 40, "save_steps": 4000, "total_flos": 2.0805485991789855e+18, "trial_name": null, "trial_params": null }