{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.949852507374631, "eval_steps": 500, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11345586566825505, "grad_norm": 0.08047471195459366, "learning_rate": 4.8109068905529086e-05, "loss": 0.1422, "step": 500 }, { "epoch": 0.2269117313365101, "grad_norm": 0.029049992561340332, "learning_rate": 4.621813781105816e-05, "loss": 0.0371, "step": 1000 }, { "epoch": 0.34036759700476515, "grad_norm": 0.014394763857126236, "learning_rate": 4.432720671658725e-05, "loss": 0.0314, "step": 1500 }, { "epoch": 0.4538234626730202, "grad_norm": 0.00882632751017809, "learning_rate": 4.2436275622116335e-05, "loss": 0.0201, "step": 2000 }, { "epoch": 0.5672793283412753, "grad_norm": 0.005796346347779036, "learning_rate": 4.054534452764541e-05, "loss": 0.0133, "step": 2500 }, { "epoch": 0.6807351940095303, "grad_norm": 0.004020103719085455, "learning_rate": 3.8654413433174495e-05, "loss": 0.0101, "step": 3000 }, { "epoch": 0.7941910596777854, "grad_norm": 0.0029154920484870672, "learning_rate": 3.676348233870358e-05, "loss": 0.0115, "step": 3500 }, { "epoch": 0.9076469253460404, "grad_norm": 0.002114818897098303, "learning_rate": 3.487255124423266e-05, "loss": 0.0063, "step": 4000 }, { "epoch": 1.0211027910142954, "grad_norm": 0.0016020000912249088, "learning_rate": 3.2981620149761745e-05, "loss": 0.0101, "step": 4500 }, { "epoch": 1.1345586566825505, "grad_norm": 0.0012561387848109007, "learning_rate": 3.109068905529083e-05, "loss": 0.0059, "step": 5000 }, { "epoch": 1.2480145223508055, "grad_norm": 0.0009663666132837534, "learning_rate": 2.9199757960819908e-05, "loss": 0.0031, "step": 5500 }, { "epoch": 1.3614703880190606, "grad_norm": 0.000789017416536808, "learning_rate": 2.730882686634899e-05, "loss": 0.0037, "step": 6000 }, { "epoch": 1.4749262536873156, "grad_norm": 0.0006201081559993327, "learning_rate": 2.5417895771878074e-05, "loss": 0.0049, "step": 6500 }, { "epoch": 1.5883821193555707, "grad_norm": 0.0005116109969094396, "learning_rate": 2.3526964677407158e-05, "loss": 0.0037, "step": 7000 }, { "epoch": 1.7018379850238259, "grad_norm": 0.0004248488985467702, "learning_rate": 2.1636033582936237e-05, "loss": 0.004, "step": 7500 }, { "epoch": 1.8152938506920808, "grad_norm": 0.0003368803591001779, "learning_rate": 1.9745102488465324e-05, "loss": 0.0012, "step": 8000 }, { "epoch": 1.9287497163603358, "grad_norm": 0.0002900149847846478, "learning_rate": 1.7854171393994404e-05, "loss": 0.0035, "step": 8500 }, { "epoch": 2.0422055820285907, "grad_norm": 0.00024262606166303158, "learning_rate": 1.5963240299523484e-05, "loss": 0.0007, "step": 9000 }, { "epoch": 2.155661447696846, "grad_norm": 0.00020890074665658176, "learning_rate": 1.4072309205052569e-05, "loss": 0.0016, "step": 9500 }, { "epoch": 2.269117313365101, "grad_norm": 0.00018598214956000447, "learning_rate": 1.218137811058165e-05, "loss": 0.0024, "step": 10000 }, { "epoch": 2.382573179033356, "grad_norm": 0.0002008028095588088, "learning_rate": 1.0290447016110734e-05, "loss": 0.0057, "step": 10500 }, { "epoch": 2.496029044701611, "grad_norm": 0.00016247628082055598, "learning_rate": 8.399515921639815e-06, "loss": 0.0005, "step": 11000 }, { "epoch": 2.609484910369866, "grad_norm": 0.0001464697706978768, "learning_rate": 6.508584827168898e-06, "loss": 0.0024, "step": 11500 }, { "epoch": 2.7229407760381212, "grad_norm": 0.00013344288163352758, "learning_rate": 4.617653732697981e-06, "loss": 0.001, "step": 12000 }, { "epoch": 2.836396641706376, "grad_norm": 0.0002060175029328093, "learning_rate": 2.726722638227063e-06, "loss": 0.0057, "step": 12500 }, { "epoch": 2.949852507374631, "grad_norm": 0.0001918944326462224, "learning_rate": 8.357915437561455e-07, "loss": 0.0033, "step": 13000 } ], "logging_steps": 500, "max_steps": 13221, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.059186473026089e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }