{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0027231632264037905, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.446326452807581e-05, "eval_loss": 1.8793249130249023, "eval_runtime": 621.2907, "eval_samples_per_second": 12.443, "eval_steps_per_second": 6.223, "step": 1 }, { "epoch": 0.0002723163226403791, "grad_norm": 5.56035852432251, "learning_rate": 5e-05, "loss": 1.7265, "step": 5 }, { "epoch": 0.0005446326452807582, "grad_norm": 6.478335857391357, "learning_rate": 0.0001, "loss": 1.3897, "step": 10 }, { "epoch": 0.0005446326452807582, "eval_loss": 1.1497383117675781, "eval_runtime": 623.1647, "eval_samples_per_second": 12.406, "eval_steps_per_second": 6.204, "step": 10 }, { "epoch": 0.0008169489679211372, "grad_norm": 5.012179851531982, "learning_rate": 9.619397662556435e-05, "loss": 0.9838, "step": 15 }, { "epoch": 0.0010892652905615163, "grad_norm": 6.457354545593262, "learning_rate": 8.535533905932738e-05, "loss": 0.5266, "step": 20 }, { "epoch": 0.0010892652905615163, "eval_loss": 0.6337934136390686, "eval_runtime": 620.9628, "eval_samples_per_second": 12.45, "eval_steps_per_second": 6.226, "step": 20 }, { "epoch": 0.0013615816132018952, "grad_norm": 5.459563732147217, "learning_rate": 6.91341716182545e-05, "loss": 0.6118, "step": 25 }, { "epoch": 0.0016338979358422744, "grad_norm": 4.611813068389893, "learning_rate": 5e-05, "loss": 0.5736, "step": 30 }, { "epoch": 0.0016338979358422744, "eval_loss": 0.524707019329071, "eval_runtime": 620.7623, "eval_samples_per_second": 12.454, "eval_steps_per_second": 6.228, "step": 30 }, { "epoch": 0.0019062142584826535, "grad_norm": 3.6113362312316895, "learning_rate": 3.086582838174551e-05, "loss": 0.525, "step": 35 }, { "epoch": 0.0021785305811230326, "grad_norm": 4.558568954467773, "learning_rate": 1.4644660940672627e-05, "loss": 0.4886, "step": 40 }, { "epoch": 0.0021785305811230326, "eval_loss": 0.47880464792251587, "eval_runtime": 622.0545, "eval_samples_per_second": 12.428, "eval_steps_per_second": 6.215, "step": 40 }, { "epoch": 0.0024508469037634118, "grad_norm": 5.0720930099487305, "learning_rate": 3.8060233744356633e-06, "loss": 0.5172, "step": 45 }, { "epoch": 0.0027231632264037905, "grad_norm": 4.553873538970947, "learning_rate": 0.0, "loss": 0.3429, "step": 50 }, { "epoch": 0.0027231632264037905, "eval_loss": 0.4715513586997986, "eval_runtime": 621.6087, "eval_samples_per_second": 12.437, "eval_steps_per_second": 6.219, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9555457081344000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }