{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 76, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013157894736842105, "grad_norm": 2.890625, "learning_rate": 2.5e-06, "loss": 3.1602, "step": 1 }, { "epoch": 0.06578947368421052, "grad_norm": 2.859375, "learning_rate": 1.25e-05, "loss": 3.2031, "step": 5 }, { "epoch": 0.13157894736842105, "grad_norm": 2.953125, "learning_rate": 1.9957341762950346e-05, "loss": 3.2461, "step": 10 }, { "epoch": 0.19736842105263158, "grad_norm": 2.46875, "learning_rate": 1.948160647590966e-05, "loss": 3.2055, "step": 15 }, { "epoch": 0.2631578947368421, "grad_norm": 2.4375, "learning_rate": 1.8502171357296144e-05, "loss": 3.168, "step": 20 }, { "epoch": 0.32894736842105265, "grad_norm": 2.234375, "learning_rate": 1.7071067811865477e-05, "loss": 3.15, "step": 25 }, { "epoch": 0.39473684210526316, "grad_norm": 2.140625, "learning_rate": 1.526432162877356e-05, "loss": 3.1352, "step": 30 }, { "epoch": 0.4605263157894737, "grad_norm": 2.046875, "learning_rate": 1.3177914195819018e-05, "loss": 3.1156, "step": 35 }, { "epoch": 0.5263157894736842, "grad_norm": 1.9296875, "learning_rate": 1.092268359463302e-05, "loss": 3.1227, "step": 40 }, { "epoch": 0.5921052631578947, "grad_norm": 1.890625, "learning_rate": 8.618436450481182e-06, "loss": 3.1273, "step": 45 }, { "epoch": 0.6578947368421053, "grad_norm": 2.015625, "learning_rate": 6.387583338128471e-06, "loss": 3.1133, "step": 50 }, { "epoch": 0.7236842105263158, "grad_norm": 1.9765625, "learning_rate": 4.348635855774082e-06, "loss": 3.125, "step": 55 }, { "epoch": 0.7894736842105263, "grad_norm": 2.03125, "learning_rate": 2.6099108277934105e-06, "loss": 3.1, "step": 60 }, { "epoch": 0.8552631578947368, "grad_norm": 2.09375, "learning_rate": 1.2637760935363053e-06, "loss": 3.1195, "step": 65 }, { "epoch": 0.9210526315789473, "grad_norm": 1.953125, "learning_rate": 3.817435682718096e-07, "loss": 3.1187, "step": 70 }, { "epoch": 0.9868421052631579, "grad_norm": 1.9609375, "learning_rate": 1.0670251976275803e-08, "loss": 3.1156, "step": 75 }, { "epoch": 1.0, "eval_loss": 2.950244188308716, "eval_runtime": 862.1559, "eval_samples_per_second": 30.726, "eval_steps_per_second": 0.96, "step": 76 }, { "epoch": 1.0, "step": 76, "total_flos": 317731110912000.0, "train_loss": 3.1433490953947367, "train_runtime": 1149.309, "train_samples_per_second": 2.11, "train_steps_per_second": 0.066 } ], "logging_steps": 5, "max_steps": 76, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 317731110912000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }