{ "best_metric": 0.8460751725901304, "best_model_checkpoint": "./ner_models/checkpoint-5500", "epoch": 6.053935057787562, "eval_steps": 500, "global_step": 5500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.550357732526142, "grad_norm": 0.050229188054800034, "learning_rate": 9.816446402349487e-06, "loss": 0.0026, "step": 500 }, { "epoch": 0.550357732526142, "eval_accuracy": 0.9978579452614379, "eval_f1": 0.8398829963118403, "eval_loss": 0.008182315155863762, "eval_precision": 0.8228258160976826, "eval_recall": 0.8576623376623377, "eval_runtime": 73.1795, "eval_samples_per_second": 20.907, "eval_steps_per_second": 2.624, "step": 500 }, { "epoch": 1.100715465052284, "grad_norm": 0.042086161673069, "learning_rate": 9.632892804698973e-06, "loss": 0.0021, "step": 1000 }, { "epoch": 1.100715465052284, "eval_accuracy": 0.9978592218137254, "eval_f1": 0.8371327254305978, "eval_loss": 0.008798174560070038, "eval_precision": 0.8168561542263965, "eval_recall": 0.8584415584415584, "eval_runtime": 73.2746, "eval_samples_per_second": 20.88, "eval_steps_per_second": 2.62, "step": 1000 }, { "epoch": 1.6510731975784259, "grad_norm": 0.038026291877031326, "learning_rate": 9.449339207048459e-06, "loss": 0.0018, "step": 1500 }, { "epoch": 1.6510731975784259, "eval_accuracy": 0.9978387969771242, "eval_f1": 0.8427194317605277, "eval_loss": 0.009040649980306625, "eval_precision": 0.8235002478929102, "eval_recall": 0.8628571428571429, "eval_runtime": 73.0885, "eval_samples_per_second": 20.934, "eval_steps_per_second": 2.627, "step": 1500 }, { "epoch": 2.201430930104568, "grad_norm": 0.022481152787804604, "learning_rate": 9.265785609397945e-06, "loss": 0.0017, "step": 2000 }, { "epoch": 2.201430930104568, "eval_accuracy": 0.9978349673202614, "eval_f1": 0.839443096180866, "eval_loss": 0.009410908445715904, "eval_precision": 0.8258356370947474, "eval_recall": 0.8535064935064935, "eval_runtime": 73.3567, "eval_samples_per_second": 20.857, "eval_steps_per_second": 2.617, "step": 2000 }, { "epoch": 2.7517886626307098, "grad_norm": 0.04910328611731529, "learning_rate": 9.08223201174743e-06, "loss": 0.0014, "step": 2500 }, { "epoch": 2.7517886626307098, "eval_accuracy": 0.9978770935457516, "eval_f1": 0.8429856798884806, "eval_loss": 0.009695506654679775, "eval_precision": 0.8230635981192774, "eval_recall": 0.8638961038961039, "eval_runtime": 72.9707, "eval_samples_per_second": 20.967, "eval_steps_per_second": 2.631, "step": 2500 }, { "epoch": 3.3021463951568517, "grad_norm": 0.028137343004345894, "learning_rate": 8.898678414096917e-06, "loss": 0.0013, "step": 3000 }, { "epoch": 3.3021463951568517, "eval_accuracy": 0.997820925245098, "eval_f1": 0.8397394968714085, "eval_loss": 0.010312107391655445, "eval_precision": 0.825923134890731, "eval_recall": 0.854025974025974, "eval_runtime": 72.7223, "eval_samples_per_second": 21.039, "eval_steps_per_second": 2.64, "step": 3000 }, { "epoch": 3.852504127682994, "grad_norm": 0.08884348720312119, "learning_rate": 8.715124816446402e-06, "loss": 0.0011, "step": 3500 }, { "epoch": 3.852504127682994, "eval_accuracy": 0.997820925245098, "eval_f1": 0.840376782077393, "eval_loss": 0.01055143028497696, "eval_precision": 0.8240139790314528, "eval_recall": 0.8574025974025974, "eval_runtime": 73.053, "eval_samples_per_second": 20.944, "eval_steps_per_second": 2.628, "step": 3500 }, { "epoch": 4.402861860209136, "grad_norm": 0.02799968421459198, "learning_rate": 8.531571218795888e-06, "loss": 0.001, "step": 4000 }, { "epoch": 4.402861860209136, "eval_accuracy": 0.9978222017973856, "eval_f1": 0.8401733367320928, "eval_loss": 0.011290175840258598, "eval_precision": 0.8248248248248248, "eval_recall": 0.8561038961038961, "eval_runtime": 72.7847, "eval_samples_per_second": 21.021, "eval_steps_per_second": 2.638, "step": 4000 }, { "epoch": 4.953219592735278, "grad_norm": 0.041495781391859055, "learning_rate": 8.348017621145376e-06, "loss": 0.0009, "step": 4500 }, { "epoch": 4.953219592735278, "eval_accuracy": 0.9977736928104575, "eval_f1": 0.8377070063694267, "eval_loss": 0.011407433077692986, "eval_precision": 0.822, "eval_recall": 0.854025974025974, "eval_runtime": 73.0522, "eval_samples_per_second": 20.944, "eval_steps_per_second": 2.628, "step": 4500 }, { "epoch": 5.5035773252614195, "grad_norm": 0.06250575184822083, "learning_rate": 8.164464023494862e-06, "loss": 0.0012, "step": 5000 }, { "epoch": 5.5035773252614195, "eval_accuracy": 0.9977622038398692, "eval_f1": 0.8407464212678937, "eval_loss": 0.01091926172375679, "eval_precision": 0.8276295923502768, "eval_recall": 0.8542857142857143, "eval_runtime": 75.4389, "eval_samples_per_second": 20.281, "eval_steps_per_second": 2.545, "step": 5000 }, { "epoch": 6.053935057787562, "grad_norm": 0.05719891935586929, "learning_rate": 7.980910425844346e-06, "loss": 0.0014, "step": 5500 }, { "epoch": 6.053935057787562, "eval_accuracy": 0.9978017769607843, "eval_f1": 0.8460751725901304, "eval_loss": 0.010837742127478123, "eval_precision": 0.8330815709969789, "eval_recall": 0.8594805194805195, "eval_runtime": 76.2331, "eval_samples_per_second": 20.07, "eval_steps_per_second": 2.519, "step": 5500 } ], "logging_steps": 500, "max_steps": 27240, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.597283514855014e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }