{ "best_metric": 0.8367001414299011, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 1.0, "eval_steps": 50, "global_step": 162, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006172839506172839, "eval_loss": 1.9562022686004639, "eval_runtime": 19.9289, "eval_samples_per_second": 13.699, "eval_steps_per_second": 3.462, "step": 1 }, { "epoch": 0.018518518518518517, "grad_norm": 0.5035348534584045, "learning_rate": 3e-05, "loss": 1.3592, "step": 3 }, { "epoch": 0.037037037037037035, "grad_norm": 0.5112131834030151, "learning_rate": 6e-05, "loss": 1.2165, "step": 6 }, { "epoch": 0.05555555555555555, "grad_norm": 0.600572407245636, "learning_rate": 9e-05, "loss": 1.2194, "step": 9 }, { "epoch": 0.07407407407407407, "grad_norm": 0.5512685179710388, "learning_rate": 9.995728791936504e-05, "loss": 1.1536, "step": 12 }, { "epoch": 0.09259259259259259, "grad_norm": 4.089280605316162, "learning_rate": 9.973324900566213e-05, "loss": 1.4903, "step": 15 }, { "epoch": 0.1111111111111111, "grad_norm": 2.117110252380371, "learning_rate": 9.931806517013612e-05, "loss": 1.1609, "step": 18 }, { "epoch": 0.12962962962962962, "grad_norm": 2.3771092891693115, "learning_rate": 9.871333213161438e-05, "loss": 1.062, "step": 21 }, { "epoch": 0.14814814814814814, "grad_norm": 1.8321698904037476, "learning_rate": 9.792137412291265e-05, "loss": 1.112, "step": 24 }, { "epoch": 0.16666666666666666, "grad_norm": 1.5089828968048096, "learning_rate": 9.694523495787149e-05, "loss": 0.9636, "step": 27 }, { "epoch": 0.18518518518518517, "grad_norm": 1.330153465270996, "learning_rate": 9.578866633275288e-05, "loss": 0.8645, "step": 30 }, { "epoch": 0.2037037037037037, "grad_norm": 1.6392351388931274, "learning_rate": 9.445611340695926e-05, "loss": 0.994, "step": 33 }, { "epoch": 0.2222222222222222, "grad_norm": 2.0593364238739014, "learning_rate": 9.295269771849427e-05, "loss": 0.9373, "step": 36 }, { "epoch": 0.24074074074074073, "grad_norm": 1.6057007312774658, "learning_rate": 9.12841974998278e-05, "loss": 0.8773, "step": 39 }, { "epoch": 0.25925925925925924, "grad_norm": 0.8941075801849365, "learning_rate": 8.945702546981969e-05, "loss": 1.0678, "step": 42 }, { "epoch": 0.2777777777777778, "grad_norm": 0.4377215504646301, "learning_rate": 8.74782041870563e-05, "loss": 1.0822, "step": 45 }, { "epoch": 0.2962962962962963, "grad_norm": 0.44285309314727783, "learning_rate": 8.535533905932738e-05, "loss": 1.0699, "step": 48 }, { "epoch": 0.30864197530864196, "eval_loss": 1.018227458000183, "eval_runtime": 20.0515, "eval_samples_per_second": 13.615, "eval_steps_per_second": 3.441, "step": 50 }, { "epoch": 0.3148148148148148, "grad_norm": 0.3906916379928589, "learning_rate": 8.309658911297834e-05, "loss": 1.0028, "step": 51 }, { "epoch": 0.3333333333333333, "grad_norm": 0.8289111852645874, "learning_rate": 8.07106356344834e-05, "loss": 1.151, "step": 54 }, { "epoch": 0.35185185185185186, "grad_norm": 1.4784194231033325, "learning_rate": 7.820664880476256e-05, "loss": 0.8767, "step": 57 }, { "epoch": 0.37037037037037035, "grad_norm": 1.4373376369476318, "learning_rate": 7.559425245448006e-05, "loss": 0.9452, "step": 60 }, { "epoch": 0.3888888888888889, "grad_norm": 1.172136664390564, "learning_rate": 7.288348707578408e-05, "loss": 0.8619, "step": 63 }, { "epoch": 0.4074074074074074, "grad_norm": 1.534406304359436, "learning_rate": 7.008477123264848e-05, "loss": 0.7924, "step": 66 }, { "epoch": 0.42592592592592593, "grad_norm": 1.261856198310852, "learning_rate": 6.720886151813194e-05, "loss": 0.8172, "step": 69 }, { "epoch": 0.4444444444444444, "grad_norm": 1.2206156253814697, "learning_rate": 6.426681121245527e-05, "loss": 0.8421, "step": 72 }, { "epoch": 0.46296296296296297, "grad_norm": 1.6020294427871704, "learning_rate": 6.126992780079031e-05, "loss": 0.9739, "step": 75 }, { "epoch": 0.48148148148148145, "grad_norm": 1.7759839296340942, "learning_rate": 5.8229729514036705e-05, "loss": 0.8757, "step": 78 }, { "epoch": 0.5, "grad_norm": 0.49982303380966187, "learning_rate": 5.515790105961786e-05, "loss": 0.8141, "step": 81 }, { "epoch": 0.5185185185185185, "grad_norm": 0.46078914403915405, "learning_rate": 5.2066248712440656e-05, "loss": 1.0369, "step": 84 }, { "epoch": 0.5370370370370371, "grad_norm": 0.5604984164237976, "learning_rate": 4.8966654938622295e-05, "loss": 1.0657, "step": 87 }, { "epoch": 0.5555555555555556, "grad_norm": 0.4341430962085724, "learning_rate": 4.5871032726383386e-05, "loss": 1.0451, "step": 90 }, { "epoch": 0.5740740740740741, "grad_norm": 0.4503065049648285, "learning_rate": 4.2791279799632666e-05, "loss": 1.0712, "step": 93 }, { "epoch": 0.5925925925925926, "grad_norm": 1.6895031929016113, "learning_rate": 3.973923289021829e-05, "loss": 1.0168, "step": 96 }, { "epoch": 0.6111111111111112, "grad_norm": 1.4327552318572998, "learning_rate": 3.67266222445964e-05, "loss": 0.8845, "step": 99 }, { "epoch": 0.6172839506172839, "eval_loss": 0.884192168712616, "eval_runtime": 20.2826, "eval_samples_per_second": 13.46, "eval_steps_per_second": 3.402, "step": 100 }, { "epoch": 0.6296296296296297, "grad_norm": 1.1935515403747559, "learning_rate": 3.3765026539765834e-05, "loss": 0.8139, "step": 102 }, { "epoch": 0.6481481481481481, "grad_norm": 1.2995893955230713, "learning_rate": 3.086582838174551e-05, "loss": 0.893, "step": 105 }, { "epoch": 0.6666666666666666, "grad_norm": 1.3577182292938232, "learning_rate": 2.804017055763149e-05, "loss": 0.9254, "step": 108 }, { "epoch": 0.6851851851851852, "grad_norm": 1.3016126155853271, "learning_rate": 2.529891320937481e-05, "loss": 0.8247, "step": 111 }, { "epoch": 0.7037037037037037, "grad_norm": 1.1968082189559937, "learning_rate": 2.2652592093878666e-05, "loss": 0.8928, "step": 114 }, { "epoch": 0.7222222222222222, "grad_norm": 1.577446699142456, "learning_rate": 2.0111378089837956e-05, "loss": 0.8642, "step": 117 }, { "epoch": 0.7407407407407407, "grad_norm": 1.3656805753707886, "learning_rate": 1.768503810695295e-05, "loss": 0.678, "step": 120 }, { "epoch": 0.7592592592592593, "grad_norm": 0.3362742066383362, "learning_rate": 1.5382897547758514e-05, "loss": 1.044, "step": 123 }, { "epoch": 0.7777777777777778, "grad_norm": 0.31672272086143494, "learning_rate": 1.3213804466343421e-05, "loss": 0.9984, "step": 126 }, { "epoch": 0.7962962962962963, "grad_norm": 0.3221389055252075, "learning_rate": 1.118609556171213e-05, "loss": 1.0372, "step": 129 }, { "epoch": 0.8148148148148148, "grad_norm": 0.37123388051986694, "learning_rate": 9.307564136490254e-06, "loss": 1.0451, "step": 132 }, { "epoch": 0.8333333333333334, "grad_norm": 1.3557378053665161, "learning_rate": 7.585430144121319e-06, "loss": 0.9801, "step": 135 }, { "epoch": 0.8518518518518519, "grad_norm": 1.2644445896148682, "learning_rate": 6.026312439675552e-06, "loss": 0.8632, "step": 138 }, { "epoch": 0.8703703703703703, "grad_norm": 1.4030702114105225, "learning_rate": 4.636203340922008e-06, "loss": 0.9115, "step": 141 }, { "epoch": 0.8888888888888888, "grad_norm": 1.1068618297576904, "learning_rate": 3.420445597436056e-06, "loss": 0.7584, "step": 144 }, { "epoch": 0.9074074074074074, "grad_norm": 1.4436562061309814, "learning_rate": 2.3837118562592797e-06, "loss": 0.9516, "step": 147 }, { "epoch": 0.9259259259259259, "grad_norm": 1.3743484020233154, "learning_rate": 1.5299867030334814e-06, "loss": 0.8574, "step": 150 }, { "epoch": 0.9259259259259259, "eval_loss": 0.8367001414299011, "eval_runtime": 20.0366, "eval_samples_per_second": 13.625, "eval_steps_per_second": 3.444, "step": 150 }, { "epoch": 0.9444444444444444, "grad_norm": 1.1355260610580444, "learning_rate": 8.62551347632029e-07, "loss": 0.6921, "step": 153 }, { "epoch": 0.9629629629629629, "grad_norm": 1.2376205921173096, "learning_rate": 3.839710131477492e-07, "loss": 0.7755, "step": 156 }, { "epoch": 0.9814814814814815, "grad_norm": 1.5938589572906494, "learning_rate": 9.60850767065924e-08, "loss": 0.7325, "step": 159 }, { "epoch": 1.0, "grad_norm": 1.6113556623458862, "learning_rate": 0.0, "loss": 0.7744, "step": 162 } ], "logging_steps": 3, "max_steps": 162, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1994758268256256e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }