{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 3620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 15.835403726708075, "eval_loss": 4.191929817199707, "eval_rouge1": 14.9919, "eval_rouge2": 9.1411, "eval_rougeL": 12.8299, "eval_rougeLsum": 12.9602, "eval_runtime": 191.6694, "eval_samples_per_second": 1.68, "eval_steps_per_second": 0.214, "step": 362 }, { "epoch": 1.38, "learning_rate": 3e-05, "loss": 19.4803, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 10.891304347826088, "eval_loss": 0.8027109503746033, "eval_rouge1": 31.2365, "eval_rouge2": 23.4015, "eval_rougeL": 27.8979, "eval_rougeLsum": 28.0521, "eval_runtime": 167.1507, "eval_samples_per_second": 1.926, "eval_steps_per_second": 0.245, "step": 724 }, { "epoch": 2.76, "learning_rate": 2.5192307692307694e-05, "loss": 0.87, "step": 1000 }, { "epoch": 3.0, "eval_gen_len": 10.48136645962733, "eval_loss": 0.7601152658462524, "eval_rouge1": 32.7524, "eval_rouge2": 24.7831, "eval_rougeL": 29.4005, "eval_rougeLsum": 29.6329, "eval_runtime": 163.4166, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.251, "step": 1086 }, { "epoch": 4.0, "eval_gen_len": 10.73913043478261, "eval_loss": 0.7358614802360535, "eval_rouge1": 32.4199, "eval_rouge2": 24.3103, "eval_rougeL": 29.045, "eval_rougeLsum": 29.3107, "eval_runtime": 165.7836, "eval_samples_per_second": 1.942, "eval_steps_per_second": 0.247, "step": 1448 }, { "epoch": 4.14, "learning_rate": 2.0384615384615387e-05, "loss": 0.7969, "step": 1500 }, { "epoch": 5.0, "eval_gen_len": 10.677018633540373, "eval_loss": 0.7158524990081787, "eval_rouge1": 33.081, "eval_rouge2": 24.9552, "eval_rougeL": 29.7936, "eval_rougeLsum": 30.0534, "eval_runtime": 157.6094, "eval_samples_per_second": 2.043, "eval_steps_per_second": 0.26, "step": 1810 }, { "epoch": 5.52, "learning_rate": 1.557692307692308e-05, "loss": 0.7607, "step": 2000 }, { "epoch": 6.0, "eval_gen_len": 10.881987577639752, "eval_loss": 0.7028866410255432, "eval_rouge1": 32.6081, "eval_rouge2": 24.4439, "eval_rougeL": 29.3121, "eval_rougeLsum": 29.5849, "eval_runtime": 172.6927, "eval_samples_per_second": 1.865, "eval_steps_per_second": 0.237, "step": 2172 }, { "epoch": 6.91, "learning_rate": 1.076923076923077e-05, "loss": 0.7482, "step": 2500 }, { "epoch": 7.0, "eval_gen_len": 10.881987577639752, "eval_loss": 0.6927917003631592, "eval_rouge1": 32.7673, "eval_rouge2": 24.6101, "eval_rougeL": 29.5065, "eval_rougeLsum": 29.7823, "eval_runtime": 166.0105, "eval_samples_per_second": 1.94, "eval_steps_per_second": 0.247, "step": 2534 }, { "epoch": 8.0, "eval_gen_len": 11.0, "eval_loss": 0.6865409016609192, "eval_rouge1": 32.648, "eval_rouge2": 24.3905, "eval_rougeL": 29.4374, "eval_rougeLsum": 29.7019, "eval_runtime": 167.4583, "eval_samples_per_second": 1.923, "eval_steps_per_second": 0.245, "step": 2896 }, { "epoch": 8.29, "learning_rate": 5.9615384615384615e-06, "loss": 0.729, "step": 3000 }, { "epoch": 9.0, "eval_gen_len": 11.003105590062113, "eval_loss": 0.6830553412437439, "eval_rouge1": 32.7058, "eval_rouge2": 24.4816, "eval_rougeL": 29.4377, "eval_rougeLsum": 29.6886, "eval_runtime": 167.3316, "eval_samples_per_second": 1.924, "eval_steps_per_second": 0.245, "step": 3258 }, { "epoch": 9.67, "learning_rate": 1.153846153846154e-06, "loss": 0.7218, "step": 3500 }, { "epoch": 10.0, "eval_gen_len": 10.937888198757763, "eval_loss": 0.6820552349090576, "eval_rouge1": 32.9118, "eval_rouge2": 24.7369, "eval_rougeL": 29.6106, "eval_rougeLsum": 29.8854, "eval_runtime": 166.8612, "eval_samples_per_second": 1.93, "eval_steps_per_second": 0.246, "step": 3620 } ], "logging_steps": 500, "max_steps": 3620, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.047953789026304e+16, "trial_name": null, "trial_params": null }