{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6525285481239804, "eval_steps": 20, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.969849246231156e-05, "loss": 1.7448, "step": 20 }, { "epoch": 0.04, "eval_loss": 1.6862075328826904, "eval_runtime": 219.9041, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.2, "step": 20 }, { "epoch": 0.09, "learning_rate": 1.9296482412060304e-05, "loss": 1.6491, "step": 40 }, { "epoch": 0.09, "eval_loss": 1.6274302005767822, "eval_runtime": 219.8105, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.2, "step": 40 }, { "epoch": 0.13, "learning_rate": 1.8894472361809046e-05, "loss": 1.6107, "step": 60 }, { "epoch": 0.13, "eval_loss": 1.5971616506576538, "eval_runtime": 219.7599, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 60 }, { "epoch": 0.17, "learning_rate": 1.8492462311557792e-05, "loss": 1.6148, "step": 80 }, { "epoch": 0.17, "eval_loss": 1.5817948579788208, "eval_runtime": 219.7888, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 80 }, { "epoch": 0.22, "learning_rate": 1.8090452261306535e-05, "loss": 1.5727, "step": 100 }, { "epoch": 0.22, "eval_loss": 1.5724695920944214, "eval_runtime": 219.7928, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.2, "step": 100 }, { "epoch": 0.26, "learning_rate": 1.768844221105528e-05, "loss": 1.5578, "step": 120 }, { "epoch": 0.26, "eval_loss": 1.5660758018493652, "eval_runtime": 219.7399, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 120 }, { "epoch": 0.3, "learning_rate": 1.728643216080402e-05, "loss": 1.5592, "step": 140 }, { "epoch": 0.3, "eval_loss": 1.5614327192306519, "eval_runtime": 219.7821, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 140 }, { "epoch": 0.35, "learning_rate": 1.6884422110552766e-05, "loss": 1.5719, "step": 160 }, { "epoch": 0.35, "eval_loss": 1.5577027797698975, "eval_runtime": 219.8353, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.2, "step": 160 }, { "epoch": 0.39, "learning_rate": 1.6482412060301508e-05, "loss": 1.5437, "step": 180 }, { "epoch": 0.39, "eval_loss": 1.5545556545257568, "eval_runtime": 219.7845, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 180 }, { "epoch": 0.44, "learning_rate": 1.6080402010050254e-05, "loss": 1.5645, "step": 200 }, { "epoch": 0.44, "eval_loss": 1.5518943071365356, "eval_runtime": 219.742, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 200 }, { "epoch": 0.48, "learning_rate": 1.5678391959798997e-05, "loss": 1.5687, "step": 220 }, { "epoch": 0.48, "eval_loss": 1.549355149269104, "eval_runtime": 219.7771, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 220 }, { "epoch": 0.52, "learning_rate": 1.527638190954774e-05, "loss": 1.5692, "step": 240 }, { "epoch": 0.52, "eval_loss": 1.5471032857894897, "eval_runtime": 219.7527, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 240 }, { "epoch": 0.57, "learning_rate": 1.4874371859296483e-05, "loss": 1.5821, "step": 260 }, { "epoch": 0.57, "eval_loss": 1.5451416969299316, "eval_runtime": 219.7519, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 260 }, { "epoch": 0.61, "learning_rate": 1.4472361809045228e-05, "loss": 1.54, "step": 280 }, { "epoch": 0.61, "eval_loss": 1.5428489446640015, "eval_runtime": 219.7649, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 280 }, { "epoch": 0.65, "learning_rate": 1.4070351758793972e-05, "loss": 1.5771, "step": 300 }, { "epoch": 0.65, "eval_loss": 1.5406657457351685, "eval_runtime": 219.7434, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.2, "step": 300 } ], "logging_steps": 20, "max_steps": 1000, "num_train_epochs": 3, "save_steps": 20, "total_flos": 2.103288904286208e+17, "trial_name": null, "trial_params": null }