ner-baseline-dictabert-he / trainer_state.json
maayanorner's picture
Upload 11 files
06440c0 verified
{
"best_metric": 0.8460751725901304,
"best_model_checkpoint": "./ner_models/checkpoint-5500",
"epoch": 6.053935057787562,
"eval_steps": 500,
"global_step": 5500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.550357732526142,
"grad_norm": 0.050229188054800034,
"learning_rate": 9.816446402349487e-06,
"loss": 0.0026,
"step": 500
},
{
"epoch": 0.550357732526142,
"eval_accuracy": 0.9978579452614379,
"eval_f1": 0.8398829963118403,
"eval_loss": 0.008182315155863762,
"eval_precision": 0.8228258160976826,
"eval_recall": 0.8576623376623377,
"eval_runtime": 73.1795,
"eval_samples_per_second": 20.907,
"eval_steps_per_second": 2.624,
"step": 500
},
{
"epoch": 1.100715465052284,
"grad_norm": 0.042086161673069,
"learning_rate": 9.632892804698973e-06,
"loss": 0.0021,
"step": 1000
},
{
"epoch": 1.100715465052284,
"eval_accuracy": 0.9978592218137254,
"eval_f1": 0.8371327254305978,
"eval_loss": 0.008798174560070038,
"eval_precision": 0.8168561542263965,
"eval_recall": 0.8584415584415584,
"eval_runtime": 73.2746,
"eval_samples_per_second": 20.88,
"eval_steps_per_second": 2.62,
"step": 1000
},
{
"epoch": 1.6510731975784259,
"grad_norm": 0.038026291877031326,
"learning_rate": 9.449339207048459e-06,
"loss": 0.0018,
"step": 1500
},
{
"epoch": 1.6510731975784259,
"eval_accuracy": 0.9978387969771242,
"eval_f1": 0.8427194317605277,
"eval_loss": 0.009040649980306625,
"eval_precision": 0.8235002478929102,
"eval_recall": 0.8628571428571429,
"eval_runtime": 73.0885,
"eval_samples_per_second": 20.934,
"eval_steps_per_second": 2.627,
"step": 1500
},
{
"epoch": 2.201430930104568,
"grad_norm": 0.022481152787804604,
"learning_rate": 9.265785609397945e-06,
"loss": 0.0017,
"step": 2000
},
{
"epoch": 2.201430930104568,
"eval_accuracy": 0.9978349673202614,
"eval_f1": 0.839443096180866,
"eval_loss": 0.009410908445715904,
"eval_precision": 0.8258356370947474,
"eval_recall": 0.8535064935064935,
"eval_runtime": 73.3567,
"eval_samples_per_second": 20.857,
"eval_steps_per_second": 2.617,
"step": 2000
},
{
"epoch": 2.7517886626307098,
"grad_norm": 0.04910328611731529,
"learning_rate": 9.08223201174743e-06,
"loss": 0.0014,
"step": 2500
},
{
"epoch": 2.7517886626307098,
"eval_accuracy": 0.9978770935457516,
"eval_f1": 0.8429856798884806,
"eval_loss": 0.009695506654679775,
"eval_precision": 0.8230635981192774,
"eval_recall": 0.8638961038961039,
"eval_runtime": 72.9707,
"eval_samples_per_second": 20.967,
"eval_steps_per_second": 2.631,
"step": 2500
},
{
"epoch": 3.3021463951568517,
"grad_norm": 0.028137343004345894,
"learning_rate": 8.898678414096917e-06,
"loss": 0.0013,
"step": 3000
},
{
"epoch": 3.3021463951568517,
"eval_accuracy": 0.997820925245098,
"eval_f1": 0.8397394968714085,
"eval_loss": 0.010312107391655445,
"eval_precision": 0.825923134890731,
"eval_recall": 0.854025974025974,
"eval_runtime": 72.7223,
"eval_samples_per_second": 21.039,
"eval_steps_per_second": 2.64,
"step": 3000
},
{
"epoch": 3.852504127682994,
"grad_norm": 0.08884348720312119,
"learning_rate": 8.715124816446402e-06,
"loss": 0.0011,
"step": 3500
},
{
"epoch": 3.852504127682994,
"eval_accuracy": 0.997820925245098,
"eval_f1": 0.840376782077393,
"eval_loss": 0.01055143028497696,
"eval_precision": 0.8240139790314528,
"eval_recall": 0.8574025974025974,
"eval_runtime": 73.053,
"eval_samples_per_second": 20.944,
"eval_steps_per_second": 2.628,
"step": 3500
},
{
"epoch": 4.402861860209136,
"grad_norm": 0.02799968421459198,
"learning_rate": 8.531571218795888e-06,
"loss": 0.001,
"step": 4000
},
{
"epoch": 4.402861860209136,
"eval_accuracy": 0.9978222017973856,
"eval_f1": 0.8401733367320928,
"eval_loss": 0.011290175840258598,
"eval_precision": 0.8248248248248248,
"eval_recall": 0.8561038961038961,
"eval_runtime": 72.7847,
"eval_samples_per_second": 21.021,
"eval_steps_per_second": 2.638,
"step": 4000
},
{
"epoch": 4.953219592735278,
"grad_norm": 0.041495781391859055,
"learning_rate": 8.348017621145376e-06,
"loss": 0.0009,
"step": 4500
},
{
"epoch": 4.953219592735278,
"eval_accuracy": 0.9977736928104575,
"eval_f1": 0.8377070063694267,
"eval_loss": 0.011407433077692986,
"eval_precision": 0.822,
"eval_recall": 0.854025974025974,
"eval_runtime": 73.0522,
"eval_samples_per_second": 20.944,
"eval_steps_per_second": 2.628,
"step": 4500
},
{
"epoch": 5.5035773252614195,
"grad_norm": 0.06250575184822083,
"learning_rate": 8.164464023494862e-06,
"loss": 0.0012,
"step": 5000
},
{
"epoch": 5.5035773252614195,
"eval_accuracy": 0.9977622038398692,
"eval_f1": 0.8407464212678937,
"eval_loss": 0.01091926172375679,
"eval_precision": 0.8276295923502768,
"eval_recall": 0.8542857142857143,
"eval_runtime": 75.4389,
"eval_samples_per_second": 20.281,
"eval_steps_per_second": 2.545,
"step": 5000
},
{
"epoch": 6.053935057787562,
"grad_norm": 0.05719891935586929,
"learning_rate": 7.980910425844346e-06,
"loss": 0.0014,
"step": 5500
},
{
"epoch": 6.053935057787562,
"eval_accuracy": 0.9978017769607843,
"eval_f1": 0.8460751725901304,
"eval_loss": 0.010837742127478123,
"eval_precision": 0.8330815709969789,
"eval_recall": 0.8594805194805195,
"eval_runtime": 76.2331,
"eval_samples_per_second": 20.07,
"eval_steps_per_second": 2.519,
"step": 5500
}
],
"logging_steps": 500,
"max_steps": 27240,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.597283514855014e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}