|
{ |
|
"best_metric": 0.9280074314909428, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4480", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 4480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9985976613735942, |
|
"eval_f1": 0.9184549356223176, |
|
"eval_loss": 0.005405870731920004, |
|
"eval_precision": 0.9404296875, |
|
"eval_recall": 0.8974836905871388, |
|
"eval_runtime": 15.084, |
|
"eval_samples_per_second": 460.487, |
|
"eval_steps_per_second": 57.611, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.1160714285714286, |
|
"grad_norm": 0.0893813744187355, |
|
"learning_rate": 4.4419642857142854e-05, |
|
"loss": 0.016, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9985627773281612, |
|
"eval_f1": 0.9166278528178854, |
|
"eval_loss": 0.004539168905466795, |
|
"eval_precision": 0.9162011173184358, |
|
"eval_recall": 0.9170549860205033, |
|
"eval_runtime": 15.2132, |
|
"eval_samples_per_second": 456.577, |
|
"eval_steps_per_second": 57.121, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 0.01820996217429638, |
|
"learning_rate": 3.883928571428572e-05, |
|
"loss": 0.0039, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.998548823709988, |
|
"eval_f1": 0.9221611721611722, |
|
"eval_loss": 0.005798548460006714, |
|
"eval_precision": 0.9063906390639064, |
|
"eval_recall": 0.9384902143522833, |
|
"eval_runtime": 15.0713, |
|
"eval_samples_per_second": 460.875, |
|
"eval_steps_per_second": 57.659, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 3.3482142857142856, |
|
"grad_norm": 0.09450385719537735, |
|
"learning_rate": 3.325892857142857e-05, |
|
"loss": 0.0022, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.998513939664555, |
|
"eval_f1": 0.9151459854014599, |
|
"eval_loss": 0.005682563409209251, |
|
"eval_precision": 0.8963360142984808, |
|
"eval_recall": 0.934762348555452, |
|
"eval_runtime": 15.3704, |
|
"eval_samples_per_second": 451.907, |
|
"eval_steps_per_second": 56.537, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 4.464285714285714, |
|
"grad_norm": 0.08753237873315811, |
|
"learning_rate": 2.767857142857143e-05, |
|
"loss": 0.0017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9987023135098931, |
|
"eval_f1": 0.9271217712177122, |
|
"eval_loss": 0.005979395937174559, |
|
"eval_precision": 0.9178082191780822, |
|
"eval_recall": 0.9366262814538676, |
|
"eval_runtime": 15.1424, |
|
"eval_samples_per_second": 458.713, |
|
"eval_steps_per_second": 57.389, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 5.580357142857143, |
|
"grad_norm": 0.010990791022777557, |
|
"learning_rate": 2.2098214285714286e-05, |
|
"loss": 0.0012, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9987092903189797, |
|
"eval_f1": 0.9254426840633737, |
|
"eval_loss": 0.006339639890938997, |
|
"eval_precision": 0.9254426840633737, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 15.1172, |
|
"eval_samples_per_second": 459.478, |
|
"eval_steps_per_second": 57.484, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 6.696428571428571, |
|
"grad_norm": 0.0010647091548889875, |
|
"learning_rate": 1.6517857142857144e-05, |
|
"loss": 0.0008, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9986116149917673, |
|
"eval_f1": 0.9260450160771704, |
|
"eval_loss": 0.0069307987578213215, |
|
"eval_precision": 0.9130434782608695, |
|
"eval_recall": 0.9394221808014911, |
|
"eval_runtime": 15.1162, |
|
"eval_samples_per_second": 459.508, |
|
"eval_steps_per_second": 57.488, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"grad_norm": 0.0007633898057974875, |
|
"learning_rate": 1.09375e-05, |
|
"loss": 0.0005, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9986395222281137, |
|
"eval_f1": 0.9252900232018563, |
|
"eval_loss": 0.006891186349093914, |
|
"eval_precision": 0.9214417744916821, |
|
"eval_recall": 0.9291705498602051, |
|
"eval_runtime": 15.1611, |
|
"eval_samples_per_second": 458.146, |
|
"eval_steps_per_second": 57.318, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 8.928571428571429, |
|
"grad_norm": 0.0015532037941738963, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.0004, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9986534758462869, |
|
"eval_f1": 0.9270106927010694, |
|
"eval_loss": 0.0076610674150288105, |
|
"eval_precision": 0.924860853432282, |
|
"eval_recall": 0.9291705498602051, |
|
"eval_runtime": 15.344, |
|
"eval_samples_per_second": 452.684, |
|
"eval_steps_per_second": 56.634, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9986883598917199, |
|
"eval_f1": 0.9280074314909428, |
|
"eval_loss": 0.007708101533353329, |
|
"eval_precision": 0.925, |
|
"eval_recall": 0.9310344827586207, |
|
"eval_runtime": 15.1825, |
|
"eval_samples_per_second": 457.501, |
|
"eval_steps_per_second": 57.237, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4480, |
|
"total_flos": 1.178126279062056e+16, |
|
"train_loss": 0.0030171065125614406, |
|
"train_runtime": 1889.6824, |
|
"train_samples_per_second": 151.708, |
|
"train_steps_per_second": 2.371 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.178126279062056e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|