{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 15600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 0.00029038461538461537, "loss": 0.0701, "step": 500 }, { "epoch": 0.32, "learning_rate": 0.00028076923076923076, "loss": 0.0536, "step": 1000 }, { "epoch": 0.48, "learning_rate": 0.00027115384615384615, "loss": 0.0466, "step": 1500 }, { "epoch": 0.64, "learning_rate": 0.00026153846153846154, "loss": 0.0412, "step": 2000 }, { "epoch": 0.8, "learning_rate": 0.0002519230769230769, "loss": 0.0373, "step": 2500 }, { "epoch": 0.96, "learning_rate": 0.0002423076923076923, "loss": 0.0324, "step": 3000 }, { "epoch": 1.12, "learning_rate": 0.00023269230769230767, "loss": 0.0289, "step": 3500 }, { "epoch": 1.28, "learning_rate": 0.00022307692307692306, "loss": 0.0276, "step": 4000 }, { "epoch": 1.44, "learning_rate": 0.00021346153846153845, "loss": 0.0244, "step": 4500 }, { "epoch": 1.6, "learning_rate": 0.00020384615384615385, "loss": 0.0291, "step": 5000 }, { "epoch": 1.76, "learning_rate": 0.0001942307692307692, "loss": 0.0221, "step": 5500 }, { "epoch": 1.92, "learning_rate": 0.0001846153846153846, "loss": 0.0216, "step": 6000 }, { "epoch": 2.08, "learning_rate": 0.000175, "loss": 0.0194, "step": 6500 }, { "epoch": 2.24, "learning_rate": 0.0001653846153846154, "loss": 0.017, "step": 7000 }, { "epoch": 2.4, "learning_rate": 0.00015576923076923076, "loss": 0.0165, "step": 7500 }, { "epoch": 2.56, "learning_rate": 0.00014615384615384615, "loss": 0.015, "step": 8000 }, { "epoch": 2.72, "learning_rate": 0.00013653846153846152, "loss": 0.0156, "step": 8500 }, { "epoch": 2.88, "learning_rate": 0.0001269230769230769, "loss": 0.014, "step": 9000 }, { "epoch": 3.04, "learning_rate": 0.0001173076923076923, "loss": 0.0134, "step": 9500 }, { "epoch": 3.21, "learning_rate": 0.00010769230769230768, "loss": 0.0113, "step": 10000 }, { "epoch": 3.37, "learning_rate": 9.807692307692307e-05, "loss": 0.0103, "step": 10500 }, { "epoch": 3.53, "learning_rate": 8.846153846153845e-05, "loss": 0.0103, "step": 11000 }, { "epoch": 3.69, "learning_rate": 7.884615384615384e-05, "loss": 0.0098, "step": 11500 }, { "epoch": 3.85, "learning_rate": 6.923076923076922e-05, "loss": 0.0099, "step": 12000 }, { "epoch": 4.01, "learning_rate": 5.961538461538461e-05, "loss": 0.0084, "step": 12500 }, { "epoch": 4.17, "learning_rate": 4.9999999999999996e-05, "loss": 0.0065, "step": 13000 }, { "epoch": 4.33, "learning_rate": 4.038461538461538e-05, "loss": 0.0066, "step": 13500 }, { "epoch": 4.49, "learning_rate": 3.076923076923077e-05, "loss": 0.006, "step": 14000 }, { "epoch": 4.65, "learning_rate": 2.115384615384615e-05, "loss": 0.0065, "step": 14500 }, { "epoch": 4.81, "learning_rate": 1.1538461538461538e-05, "loss": 0.0056, "step": 15000 }, { "epoch": 4.97, "learning_rate": 1.923076923076923e-06, "loss": 0.006, "step": 15500 }, { "epoch": 5.0, "step": 15600, "total_flos": 5.943712364448768e+16, "train_loss": 0.020643039299891544, "train_runtime": 4743.0348, "train_samples_per_second": 105.227, "train_steps_per_second": 3.289 } ], "logging_steps": 500, "max_steps": 15600, "num_train_epochs": 5, "save_steps": 500, "total_flos": 5.943712364448768e+16, "trial_name": null, "trial_params": null }