{ "best_metric": 2.168490409851074, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.009116462812428778, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030388209374762593, "eval_loss": 3.856945514678955, "eval_runtime": 179.2781, "eval_samples_per_second": 7.731, "eval_steps_per_second": 3.866, "step": 1 }, { "epoch": 0.0009116462812428777, "grad_norm": 3.076671838760376, "learning_rate": 0.00012, "loss": 3.565, "step": 3 }, { "epoch": 0.0015194104687381296, "eval_loss": 3.1825942993164062, "eval_runtime": 181.5578, "eval_samples_per_second": 7.634, "eval_steps_per_second": 3.817, "step": 5 }, { "epoch": 0.0018232925624857555, "grad_norm": 5.403068542480469, "learning_rate": 0.0001992114701314478, "loss": 3.1723, "step": 6 }, { "epoch": 0.002734938843728633, "grad_norm": 7.90291166305542, "learning_rate": 0.00018763066800438636, "loss": 2.582, "step": 9 }, { "epoch": 0.003038820937476259, "eval_loss": 2.435781478881836, "eval_runtime": 180.493, "eval_samples_per_second": 7.679, "eval_steps_per_second": 3.839, "step": 10 }, { "epoch": 0.003646585124971511, "grad_norm": 3.3128092288970947, "learning_rate": 0.000163742398974869, "loss": 2.0211, "step": 12 }, { "epoch": 0.004558231406214389, "grad_norm": 2.446349859237671, "learning_rate": 0.00013090169943749476, "loss": 2.2164, "step": 15 }, { "epoch": 0.004558231406214389, "eval_loss": 2.297067403793335, "eval_runtime": 181.3469, "eval_samples_per_second": 7.643, "eval_steps_per_second": 3.821, "step": 15 }, { "epoch": 0.005469877687457266, "grad_norm": 3.5906927585601807, "learning_rate": 9.372094804706867e-05, "loss": 2.1964, "step": 18 }, { "epoch": 0.006077641874952518, "eval_loss": 2.206324815750122, "eval_runtime": 181.3223, "eval_samples_per_second": 7.644, "eval_steps_per_second": 3.822, "step": 20 }, { "epoch": 0.0063815239687001445, "grad_norm": 2.3398468494415283, "learning_rate": 5.7422070843492734e-05, "loss": 2.2411, "step": 21 }, { "epoch": 0.007293170249943022, "grad_norm": 2.6244406700134277, "learning_rate": 2.7103137257858868e-05, "loss": 2.0876, "step": 24 }, { "epoch": 0.007597052343690648, "eval_loss": 2.176274538040161, "eval_runtime": 181.6842, "eval_samples_per_second": 7.629, "eval_steps_per_second": 3.814, "step": 25 }, { "epoch": 0.0082048165311859, "grad_norm": 3.688023328781128, "learning_rate": 7.022351411174866e-06, "loss": 2.1679, "step": 27 }, { "epoch": 0.009116462812428778, "grad_norm": 3.1167757511138916, "learning_rate": 0.0, "loss": 2.0348, "step": 30 }, { "epoch": 0.009116462812428778, "eval_loss": 2.168490409851074, "eval_runtime": 181.2847, "eval_samples_per_second": 7.645, "eval_steps_per_second": 3.823, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.112830925340672e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }