{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2, "global_step": 19, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05263157894736842, "grad_norm": 11935214993408.0, "learning_rate": 1e-05, "loss": 0.7163, "step": 1 }, { "epoch": 0.10526315789473684, "grad_norm": 2096675946496.0, "learning_rate": 9.924038765061042e-06, "loss": 0.6972, "step": 2 }, { "epoch": 0.10526315789473684, "eval_loss": 0.5687638521194458, "eval_runtime": 0.2981, "eval_samples_per_second": 6.709, "eval_steps_per_second": 3.354, "step": 2 }, { "epoch": 0.15789473684210525, "grad_norm": 7390838128640.0, "learning_rate": 9.698463103929542e-06, "loss": 0.7795, "step": 3 }, { "epoch": 0.21052631578947367, "grad_norm": 3194177454080.0, "learning_rate": 9.330127018922195e-06, "loss": 0.6915, "step": 4 }, { "epoch": 0.21052631578947367, "eval_loss": 0.5684303045272827, "eval_runtime": 0.2966, "eval_samples_per_second": 6.743, "eval_steps_per_second": 3.371, "step": 4 }, { "epoch": 0.2631578947368421, "grad_norm": 131253493760.0, "learning_rate": 8.83022221559489e-06, "loss": 0.6744, "step": 5 }, { "epoch": 0.3157894736842105, "grad_norm": 491539005440.0, "learning_rate": 8.213938048432697e-06, "loss": 0.7911, "step": 6 }, { "epoch": 0.3157894736842105, "eval_loss": 0.5686608552932739, "eval_runtime": 0.2967, "eval_samples_per_second": 6.742, "eval_steps_per_second": 3.371, "step": 6 }, { "epoch": 0.3684210526315789, "grad_norm": 235763302400.0, "learning_rate": 7.500000000000001e-06, "loss": 0.5594, "step": 7 }, { "epoch": 0.42105263157894735, "grad_norm": 279532568576.0, "learning_rate": 6.710100716628345e-06, "loss": 0.7261, "step": 8 }, { "epoch": 0.42105263157894735, "eval_loss": 0.5699889063835144, "eval_runtime": 0.2966, "eval_samples_per_second": 6.744, "eval_steps_per_second": 3.372, "step": 8 }, { "epoch": 0.47368421052631576, "grad_norm": 175990996992.0, "learning_rate": 5.8682408883346535e-06, "loss": 0.6122, "step": 9 }, { "epoch": 0.5263157894736842, "grad_norm": 10130321047552.0, "learning_rate": 5e-06, "loss": 0.86, "step": 10 }, { "epoch": 0.5263157894736842, "eval_loss": 0.568690836429596, "eval_runtime": 0.2978, "eval_samples_per_second": 6.715, "eval_steps_per_second": 3.358, "step": 10 }, { "epoch": 0.5789473684210527, "grad_norm": 3898177486848.0, "learning_rate": 4.131759111665349e-06, "loss": 0.7364, "step": 11 }, { "epoch": 0.631578947368421, "grad_norm": 1318930219008.0, "learning_rate": 3.289899283371657e-06, "loss": 0.6903, "step": 12 }, { "epoch": 0.631578947368421, "eval_loss": 0.5691469311714172, "eval_runtime": 0.2973, "eval_samples_per_second": 6.728, "eval_steps_per_second": 3.364, "step": 12 }, { "epoch": 0.6842105263157895, "grad_norm": 137136914432.0, "learning_rate": 2.5000000000000015e-06, "loss": 0.663, "step": 13 }, { "epoch": 0.7368421052631579, "grad_norm": 1604217339904.0, "learning_rate": 1.7860619515673034e-06, "loss": 0.5994, "step": 14 }, { "epoch": 0.7368421052631579, "eval_loss": 0.5684089064598083, "eval_runtime": 0.2979, "eval_samples_per_second": 6.714, "eval_steps_per_second": 3.357, "step": 14 }, { "epoch": 0.7894736842105263, "grad_norm": 73108439040.0, "learning_rate": 1.1697777844051105e-06, "loss": 0.7457, "step": 15 }, { "epoch": 0.8421052631578947, "grad_norm": 361255075840.0, "learning_rate": 6.698729810778065e-07, "loss": 0.7792, "step": 16 }, { "epoch": 0.8421052631578947, "eval_loss": 0.5695986747741699, "eval_runtime": 0.2976, "eval_samples_per_second": 6.72, "eval_steps_per_second": 3.36, "step": 16 }, { "epoch": 0.8947368421052632, "grad_norm": 222161354752.0, "learning_rate": 3.015368960704584e-07, "loss": 0.6121, "step": 17 }, { "epoch": 0.9473684210526315, "grad_norm": 4849853267968.0, "learning_rate": 7.59612349389599e-08, "loss": 0.7023, "step": 18 }, { "epoch": 0.9473684210526315, "eval_loss": 0.5688631534576416, "eval_runtime": 0.298, "eval_samples_per_second": 6.711, "eval_steps_per_second": 3.355, "step": 18 }, { "epoch": 1.0, "grad_norm": 994095661056.0, "learning_rate": 0.0, "loss": 0.7789, "step": 19 }, { "epoch": 1.0, "step": 19, "total_flos": 6341035089199104.0, "train_loss": 0.7060548631768477, "train_runtime": 53.6484, "train_samples_per_second": 2.759, "train_steps_per_second": 0.354 } ], "logging_steps": 1, "max_steps": 19, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 19, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6341035089199104.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }