{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 3970, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.0083, "eval_gen_len": 19.0, "eval_loss": 1.1680346727371216, "eval_runtime": 38.281, "eval_samples_per_second": 20.689, "eval_steps_per_second": 1.306, "step": 397 }, { "epoch": 1.26, "learning_rate": 4.370277078085643e-05, "loss": 1.7159, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.0165, "eval_gen_len": 19.0, "eval_loss": 1.0287196636199951, "eval_runtime": 38.488, "eval_samples_per_second": 20.578, "eval_steps_per_second": 1.299, "step": 794 }, { "epoch": 2.52, "learning_rate": 3.7405541561712845e-05, "loss": 1.1916, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.0177, "eval_gen_len": 19.0, "eval_loss": 0.9632178544998169, "eval_runtime": 38.4087, "eval_samples_per_second": 20.62, "eval_steps_per_second": 1.302, "step": 1191 }, { "epoch": 3.78, "learning_rate": 3.1108312342569276e-05, "loss": 1.077, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 0.0307, "eval_gen_len": 19.0, "eval_loss": 0.9211758971214294, "eval_runtime": 38.4332, "eval_samples_per_second": 20.607, "eval_steps_per_second": 1.301, "step": 1588 }, { "epoch": 5.0, "eval_bleu": 0.0278, "eval_gen_len": 19.0, "eval_loss": 0.8920512199401855, "eval_runtime": 39.0152, "eval_samples_per_second": 20.3, "eval_steps_per_second": 1.282, "step": 1985 }, { "epoch": 5.04, "learning_rate": 2.4811083123425694e-05, "loss": 1.0159, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 0.0465, "eval_gen_len": 19.0, "eval_loss": 0.869423508644104, "eval_runtime": 38.5496, "eval_samples_per_second": 20.545, "eval_steps_per_second": 1.297, "step": 2382 }, { "epoch": 6.3, "learning_rate": 1.8513853904282116e-05, "loss": 0.9723, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 0.0489, "eval_gen_len": 19.0, "eval_loss": 0.8542945981025696, "eval_runtime": 38.3887, "eval_samples_per_second": 20.631, "eval_steps_per_second": 1.302, "step": 2779 }, { "epoch": 7.56, "learning_rate": 1.2216624685138539e-05, "loss": 0.9457, "step": 3000 }, { "epoch": 8.0, "eval_bleu": 0.0451, "eval_gen_len": 19.0, "eval_loss": 0.8452380895614624, "eval_runtime": 38.2635, "eval_samples_per_second": 20.699, "eval_steps_per_second": 1.307, "step": 3176 }, { "epoch": 8.82, "learning_rate": 5.919395465994963e-06, "loss": 0.9276, "step": 3500 }, { "epoch": 9.0, "eval_bleu": 0.0496, "eval_gen_len": 19.0, "eval_loss": 0.8380882143974304, "eval_runtime": 38.7379, "eval_samples_per_second": 20.445, "eval_steps_per_second": 1.291, "step": 3573 }, { "epoch": 10.0, "eval_bleu": 0.048, "eval_gen_len": 19.0, "eval_loss": 0.836793839931488, "eval_runtime": 38.47, "eval_samples_per_second": 20.587, "eval_steps_per_second": 1.3, "step": 3970 }, { "epoch": 10.0, "step": 3970, "total_flos": 2.2534682441342976e+16, "train_loss": 1.0967687035087375, "train_runtime": 2092.8002, "train_samples_per_second": 30.28, "train_steps_per_second": 1.897 } ], "max_steps": 3970, "num_train_epochs": 10, "total_flos": 2.2534682441342976e+16, "trial_name": null, "trial_params": null }