{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 18.864488225514396, "learning_rate": 2.0000000000000003e-06, "loss": 0.9524, "step": 1 }, { "epoch": 1.0, "grad_norm": 16.624666091321274, "learning_rate": 1e-05, "loss": 0.8684, "step": 5 }, { "epoch": 1.0, "eval_loss": 0.7913689017295837, "eval_runtime": 5.7172, "eval_samples_per_second": 54.397, "eval_steps_per_second": 1.749, "step": 5 }, { "epoch": 2.0, "grad_norm": 4.401290119342954, "learning_rate": 9.698463103929542e-06, "loss": 0.7031, "step": 10 }, { "epoch": 2.0, "eval_loss": 0.5348705053329468, "eval_runtime": 5.7154, "eval_samples_per_second": 54.414, "eval_steps_per_second": 1.75, "step": 10 }, { "epoch": 3.0, "grad_norm": 3.029002967994018, "learning_rate": 8.83022221559489e-06, "loss": 0.4888, "step": 15 }, { "epoch": 3.0, "eval_loss": 0.34116050601005554, "eval_runtime": 5.7277, "eval_samples_per_second": 54.298, "eval_steps_per_second": 1.746, "step": 15 }, { "epoch": 4.0, "grad_norm": 3.1549630858744604, "learning_rate": 7.500000000000001e-06, "loss": 0.302, "step": 20 }, { "epoch": 4.0, "eval_loss": 0.19024085998535156, "eval_runtime": 5.7234, "eval_samples_per_second": 54.338, "eval_steps_per_second": 1.747, "step": 20 }, { "epoch": 5.0, "grad_norm": 4.126405291248831, "learning_rate": 5.8682408883346535e-06, "loss": 0.1643, "step": 25 }, { "epoch": 5.0, "eval_loss": 0.08896404504776001, "eval_runtime": 5.7273, "eval_samples_per_second": 54.301, "eval_steps_per_second": 1.746, "step": 25 }, { "epoch": 6.0, "grad_norm": 2.870513503322164, "learning_rate": 4.131759111665349e-06, "loss": 0.0778, "step": 30 }, { "epoch": 6.0, "eval_loss": 0.04157733544707298, "eval_runtime": 5.7247, "eval_samples_per_second": 54.326, "eval_steps_per_second": 1.747, "step": 30 }, { "epoch": 7.0, "grad_norm": 1.7040194155687554, "learning_rate": 2.5000000000000015e-06, "loss": 0.0404, "step": 35 }, { "epoch": 7.0, "eval_loss": 0.027967050671577454, "eval_runtime": 5.7274, "eval_samples_per_second": 54.301, "eval_steps_per_second": 1.746, "step": 35 }, { "epoch": 8.0, "grad_norm": 1.2638859662186757, "learning_rate": 1.1697777844051105e-06, "loss": 0.0279, "step": 40 }, { "epoch": 8.0, "eval_loss": 0.021927356719970703, "eval_runtime": 5.7381, "eval_samples_per_second": 54.199, "eval_steps_per_second": 1.743, "step": 40 }, { "epoch": 9.0, "grad_norm": 0.9658748602996367, "learning_rate": 3.015368960704584e-07, "loss": 0.0214, "step": 45 }, { "epoch": 9.0, "eval_loss": 0.018554512411355972, "eval_runtime": 5.7334, "eval_samples_per_second": 54.244, "eval_steps_per_second": 1.744, "step": 45 }, { "epoch": 10.0, "grad_norm": 0.5265240066937839, "learning_rate": 0.0, "loss": 0.0183, "step": 50 }, { "epoch": 10.0, "eval_loss": 0.017760511487722397, "eval_runtime": 5.6974, "eval_samples_per_second": 54.586, "eval_steps_per_second": 1.755, "step": 50 }, { "epoch": 10.0, "step": 50, "total_flos": 10468982784000.0, "train_loss": 0.2729184678196907, "train_runtime": 352.114, "train_samples_per_second": 8.832, "train_steps_per_second": 0.142 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 10468982784000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }