{ "best_metric": 24.761, "best_model_checkpoint": "./output\\checkpoint-1052", "epoch": 20.0, "global_step": 10520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 9.5e-06, "loss": 3.0166, "step": 526 }, { "epoch": 1.0, "eval_gen_len": 15.76, "eval_rouge2": 6.32, "eval_rougeL": 21.606, "step": 526 }, { "epoch": 2.0, "learning_rate": 9e-06, "loss": 2.7368, "step": 1052 }, { "epoch": 2.0, "eval_gen_len": 22.06, "eval_rouge2": 8.84, "eval_rougeL": 24.761, "step": 1052 }, { "epoch": 3.0, "learning_rate": 8.5e-06, "loss": 2.5849, "step": 1578 }, { "epoch": 3.0, "eval_gen_len": 19.17, "eval_rouge2": 7.97, "eval_rougeL": 23.301, "step": 1578 }, { "epoch": 4.0, "learning_rate": 8.000000000000001e-06, "loss": 2.4639, "step": 2104 }, { "epoch": 4.0, "eval_gen_len": 16.53, "eval_rouge2": 6.92, "eval_rougeL": 22.252, "step": 2104 }, { "epoch": 5.0, "learning_rate": 7.500000000000001e-06, "loss": 2.3647, "step": 2630 }, { "epoch": 5.0, "eval_gen_len": 15.61, "eval_rouge2": 6.25, "eval_rougeL": 21.732, "step": 2630 }, { "epoch": 6.0, "learning_rate": 7e-06, "loss": 2.2886, "step": 3156 }, { "epoch": 6.0, "eval_gen_len": 16.78, "eval_rouge2": 7.3, "eval_rougeL": 23.088, "step": 3156 }, { "epoch": 7.0, "learning_rate": 6.5000000000000004e-06, "loss": 2.202, "step": 3682 }, { "epoch": 7.0, "eval_gen_len": 18.84, "eval_rouge2": 6.67, "eval_rougeL": 22.378, "step": 3682 }, { "epoch": 8.0, "learning_rate": 6e-06, "loss": 2.1348, "step": 4208 }, { "epoch": 8.0, "eval_gen_len": 14.52, "eval_rouge2": 6.22, "eval_rougeL": 22.215, "step": 4208 }, { "epoch": 9.0, "learning_rate": 5.500000000000001e-06, "loss": 2.0736, "step": 4734 }, { "epoch": 9.0, "eval_gen_len": 17.94, "eval_rouge2": 6.56, "eval_rougeL": 22.336, "step": 4734 }, { "epoch": 10.0, "learning_rate": 5e-06, "loss": 2.0202, "step": 5260 }, { "epoch": 10.0, "eval_gen_len": 16.91, "eval_rouge2": 6.38, "eval_rougeL": 21.603, "step": 5260 }, { "epoch": 11.0, "learning_rate": 4.5e-06, "loss": 1.9656, "step": 5786 }, { "epoch": 11.0, "eval_gen_len": 17.7, "eval_rouge2": 6.6, "eval_rougeL": 22.292, "step": 5786 }, { "epoch": 12.0, "learning_rate": 4.000000000000001e-06, "loss": 1.9173, "step": 6312 }, { "epoch": 12.0, "eval_gen_len": 17.21, "eval_rouge2": 6.32, "eval_rougeL": 21.212, "step": 6312 }, { "epoch": 13.0, "learning_rate": 3.5e-06, "loss": 1.8784, "step": 6838 }, { "epoch": 13.0, "eval_gen_len": 18.93, "eval_rouge2": 6.95, "eval_rougeL": 22.939, "step": 6838 }, { "epoch": 14.0, "learning_rate": 3e-06, "loss": 1.8346, "step": 7364 }, { "epoch": 14.0, "eval_gen_len": 19.19, "eval_rouge2": 6.42, "eval_rougeL": 21.364, "step": 7364 }, { "epoch": 15.0, "learning_rate": 2.5e-06, "loss": 1.8006, "step": 7890 }, { "epoch": 15.0, "eval_gen_len": 18.08, "eval_rouge2": 5.86, "eval_rougeL": 21.227, "step": 7890 }, { "epoch": 16.0, "learning_rate": 2.0000000000000003e-06, "loss": 1.7784, "step": 8416 }, { "epoch": 16.0, "eval_gen_len": 17.06, "eval_rouge2": 6.49, "eval_rougeL": 21.942, "step": 8416 }, { "epoch": 17.0, "learning_rate": 1.5e-06, "loss": 1.7526, "step": 8942 }, { "epoch": 17.0, "eval_gen_len": 18.66, "eval_rouge2": 7.02, "eval_rougeL": 22.17, "step": 8942 }, { "epoch": 18.0, "learning_rate": 1.0000000000000002e-06, "loss": 1.7409, "step": 9468 }, { "epoch": 18.0, "eval_gen_len": 17.86, "eval_rouge2": 6.65, "eval_rougeL": 21.959, "step": 9468 }, { "epoch": 19.0, "learning_rate": 5.000000000000001e-07, "loss": 1.7238, "step": 9994 }, { "epoch": 19.0, "eval_gen_len": 17.61, "eval_rouge2": 6.23, "eval_rougeL": 21.571, "step": 9994 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 1.7105, "step": 10520 }, { "epoch": 20.0, "eval_gen_len": 17.14, "eval_rouge2": 6.42, "eval_rougeL": 21.664, "step": 10520 } ], "max_steps": 10520, "num_train_epochs": 20, "total_flos": 9621641311027200.0, "trial_name": null, "trial_params": null }