{ "best_metric": 0.4814574122428894, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0007282419511058354, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9129678044233416e-05, "grad_norm": 1.0450439453125, "learning_rate": 2e-05, "loss": 0.9576, "step": 1 }, { "epoch": 2.9129678044233416e-05, "eval_loss": 1.2448866367340088, "eval_runtime": 2421.1915, "eval_samples_per_second": 5.97, "eval_steps_per_second": 2.985, "step": 1 }, { "epoch": 5.825935608846683e-05, "grad_norm": 1.4700437784194946, "learning_rate": 4e-05, "loss": 1.2243, "step": 2 }, { "epoch": 8.738903413270025e-05, "grad_norm": 1.3991096019744873, "learning_rate": 6e-05, "loss": 1.1542, "step": 3 }, { "epoch": 0.00011651871217693366, "grad_norm": 1.0982633829116821, "learning_rate": 8e-05, "loss": 0.8913, "step": 4 }, { "epoch": 0.00014564839022116708, "grad_norm": 1.0009615421295166, "learning_rate": 0.0001, "loss": 1.0541, "step": 5 }, { "epoch": 0.00014564839022116708, "eval_loss": 1.1605032682418823, "eval_runtime": 2428.4921, "eval_samples_per_second": 5.952, "eval_steps_per_second": 2.976, "step": 5 }, { "epoch": 0.0001747780682654005, "grad_norm": 0.9604702591896057, "learning_rate": 0.00012, "loss": 0.9064, "step": 6 }, { "epoch": 0.0002039077463096339, "grad_norm": 0.8173295855522156, "learning_rate": 0.00014, "loss": 0.774, "step": 7 }, { "epoch": 0.00023303742435386732, "grad_norm": 1.5230978727340698, "learning_rate": 0.00016, "loss": 1.4514, "step": 8 }, { "epoch": 0.00026216710239810074, "grad_norm": 0.7246477007865906, "learning_rate": 0.00018, "loss": 0.5916, "step": 9 }, { "epoch": 0.00029129678044233416, "grad_norm": 1.2219816446304321, "learning_rate": 0.0002, "loss": 1.1294, "step": 10 }, { "epoch": 0.00029129678044233416, "eval_loss": 0.6497195363044739, "eval_runtime": 2396.3032, "eval_samples_per_second": 6.032, "eval_steps_per_second": 3.016, "step": 10 }, { "epoch": 0.00032042645848656757, "grad_norm": 1.3942621946334839, "learning_rate": 0.00019781476007338058, "loss": 0.2656, "step": 11 }, { "epoch": 0.000349556136530801, "grad_norm": 0.9527015686035156, "learning_rate": 0.0001913545457642601, "loss": 0.491, "step": 12 }, { "epoch": 0.0003786858145750344, "grad_norm": 1.4849249124526978, "learning_rate": 0.00018090169943749476, "loss": 0.3274, "step": 13 }, { "epoch": 0.0004078154926192678, "grad_norm": 1.9381086826324463, "learning_rate": 0.00016691306063588583, "loss": 0.5956, "step": 14 }, { "epoch": 0.00043694517066350123, "grad_norm": 1.0866339206695557, "learning_rate": 0.00015000000000000001, "loss": 0.7109, "step": 15 }, { "epoch": 0.00043694517066350123, "eval_loss": 0.5404261946678162, "eval_runtime": 2438.9793, "eval_samples_per_second": 5.927, "eval_steps_per_second": 2.964, "step": 15 }, { "epoch": 0.00046607484870773465, "grad_norm": 1.738878846168518, "learning_rate": 0.00013090169943749476, "loss": 0.8893, "step": 16 }, { "epoch": 0.0004952045267519681, "grad_norm": 1.4717392921447754, "learning_rate": 0.00011045284632676536, "loss": 0.6926, "step": 17 }, { "epoch": 0.0005243342047962015, "grad_norm": 1.363275408744812, "learning_rate": 8.954715367323468e-05, "loss": 0.6695, "step": 18 }, { "epoch": 0.0005534638828404349, "grad_norm": 1.4391145706176758, "learning_rate": 6.909830056250527e-05, "loss": 0.6236, "step": 19 }, { "epoch": 0.0005825935608846683, "grad_norm": 1.6754724979400635, "learning_rate": 5.000000000000002e-05, "loss": 0.4852, "step": 20 }, { "epoch": 0.0005825935608846683, "eval_loss": 0.49102675914764404, "eval_runtime": 2464.1257, "eval_samples_per_second": 5.866, "eval_steps_per_second": 2.933, "step": 20 }, { "epoch": 0.0006117232389289017, "grad_norm": 1.1783339977264404, "learning_rate": 3.308693936411421e-05, "loss": 0.8808, "step": 21 }, { "epoch": 0.0006408529169731351, "grad_norm": 1.586045742034912, "learning_rate": 1.9098300562505266e-05, "loss": 0.7825, "step": 22 }, { "epoch": 0.0006699825950173686, "grad_norm": 2.2189619541168213, "learning_rate": 8.645454235739903e-06, "loss": 0.8943, "step": 23 }, { "epoch": 0.000699112273061602, "grad_norm": 1.2450097799301147, "learning_rate": 2.1852399266194314e-06, "loss": 0.5824, "step": 24 }, { "epoch": 0.0007282419511058354, "grad_norm": 1.3376847505569458, "learning_rate": 0.0, "loss": 0.6038, "step": 25 }, { "epoch": 0.0007282419511058354, "eval_loss": 0.4814574122428894, "eval_runtime": 2432.3948, "eval_samples_per_second": 5.943, "eval_steps_per_second": 2.972, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6380996656103424.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }