{ "best_metric": null, "best_model_checkpoint": null, "epoch": 200.0, "eval_steps": 500, "global_step": 6800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 14.705882352941176, "grad_norm": 0.8522269129753113, "learning_rate": 0.0009264705882352942, "loss": 1.7538, "step": 500 }, { "epoch": 29.41176470588235, "grad_norm": 0.24410583078861237, "learning_rate": 0.0008529411764705882, "loss": 0.0574, "step": 1000 }, { "epoch": 44.11764705882353, "grad_norm": 0.32126477360725403, "learning_rate": 0.0007794117647058824, "loss": 0.0246, "step": 1500 }, { "epoch": 58.8235294117647, "grad_norm": 0.1633734405040741, "learning_rate": 0.0007058823529411765, "loss": 0.0177, "step": 2000 }, { "epoch": 73.52941176470588, "grad_norm": 1.6365108489990234, "learning_rate": 0.0006323529411764706, "loss": 0.0407, "step": 2500 }, { "epoch": 88.23529411764706, "grad_norm": 0.11134446412324905, "learning_rate": 0.0005588235294117647, "loss": 0.0197, "step": 3000 }, { "epoch": 102.94117647058823, "grad_norm": 1.3109087944030762, "learning_rate": 0.0004852941176470588, "loss": 0.0146, "step": 3500 }, { "epoch": 117.6470588235294, "grad_norm": 0.1169649288058281, "learning_rate": 0.0004117647058823529, "loss": 0.0043, "step": 4000 }, { "epoch": 132.35294117647058, "grad_norm": 0.061263132840394974, "learning_rate": 0.0003382352941176471, "loss": 0.0044, "step": 4500 }, { "epoch": 147.05882352941177, "grad_norm": 0.019590700045228004, "learning_rate": 0.0002647058823529412, "loss": 0.0025, "step": 5000 }, { "epoch": 161.76470588235293, "grad_norm": 0.011288847774267197, "learning_rate": 0.00019117647058823528, "loss": 0.0024, "step": 5500 }, { "epoch": 176.47058823529412, "grad_norm": 0.011768895201385021, "learning_rate": 0.00011764705882352942, "loss": 0.0017, "step": 6000 }, { "epoch": 191.1764705882353, "grad_norm": 0.010613554157316685, "learning_rate": 4.411764705882353e-05, "loss": 0.0015, "step": 6500 }, { "epoch": 200.0, "step": 6800, "total_flos": 1.175134666752e+17, "train_loss": 0.14310694140546462, "train_runtime": 11074.5138, "train_samples_per_second": 2.438, "train_steps_per_second": 0.614 } ], "logging_steps": 500, "max_steps": 6800, "num_input_tokens_seen": 0, "num_train_epochs": 200, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.175134666752e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }