{ "best_metric": 2.080679416656494, "best_model_checkpoint": "experiments/checkpoint-30", "epoch": 7.529411764705882, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.75, "learning_rate": 0.0003, "loss": 3.069, "step": 10 }, { "epoch": 0.75, "eval_loss": 2.506248712539673, "eval_runtime": 36.514, "eval_samples_per_second": 5.477, "eval_steps_per_second": 0.685, "step": 10 }, { "epoch": 1.51, "learning_rate": 0.0002666666666666666, "loss": 2.2982, "step": 20 }, { "epoch": 1.51, "eval_loss": 2.081218957901001, "eval_runtime": 36.5059, "eval_samples_per_second": 5.479, "eval_steps_per_second": 0.685, "step": 20 }, { "epoch": 2.26, "learning_rate": 0.0002333333333333333, "loss": 2.1043, "step": 30 }, { "epoch": 2.26, "eval_loss": 2.080679416656494, "eval_runtime": 36.4738, "eval_samples_per_second": 5.483, "eval_steps_per_second": 0.685, "step": 30 }, { "epoch": 3.01, "learning_rate": 0.00019999999999999998, "loss": 2.1125, "step": 40 }, { "epoch": 3.01, "eval_loss": 2.0814473628997803, "eval_runtime": 36.4244, "eval_samples_per_second": 5.491, "eval_steps_per_second": 0.686, "step": 40 }, { "epoch": 3.76, "learning_rate": 0.00016666666666666666, "loss": 2.1094, "step": 50 }, { "epoch": 3.76, "eval_loss": 2.081106662750244, "eval_runtime": 36.5033, "eval_samples_per_second": 5.479, "eval_steps_per_second": 0.685, "step": 50 }, { "epoch": 4.52, "learning_rate": 0.0001333333333333333, "loss": 2.1128, "step": 60 }, { "epoch": 4.52, "eval_loss": 2.0811569690704346, "eval_runtime": 36.4577, "eval_samples_per_second": 5.486, "eval_steps_per_second": 0.686, "step": 60 }, { "epoch": 5.27, "learning_rate": 9.999999999999999e-05, "loss": 2.1102, "step": 70 }, { "epoch": 5.27, "eval_loss": 2.0808606147766113, "eval_runtime": 36.4061, "eval_samples_per_second": 5.494, "eval_steps_per_second": 0.687, "step": 70 }, { "epoch": 6.02, "learning_rate": 6.666666666666666e-05, "loss": 2.1057, "step": 80 }, { "epoch": 6.02, "eval_loss": 2.081094741821289, "eval_runtime": 36.535, "eval_samples_per_second": 5.474, "eval_steps_per_second": 0.684, "step": 80 }, { "epoch": 6.78, "learning_rate": 3.333333333333333e-05, "loss": 2.1069, "step": 90 }, { "epoch": 6.78, "eval_loss": 2.0808064937591553, "eval_runtime": 36.4018, "eval_samples_per_second": 5.494, "eval_steps_per_second": 0.687, "step": 90 }, { "epoch": 7.53, "learning_rate": 0.0, "loss": 2.1147, "step": 100 }, { "epoch": 7.53, "eval_loss": 2.0808823108673096, "eval_runtime": 36.4455, "eval_samples_per_second": 5.488, "eval_steps_per_second": 0.686, "step": 100 } ], "max_steps": 100, "num_train_epochs": 8, "total_flos": 6.900128905592832e+16, "trial_name": null, "trial_params": null }