|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.197399616241455, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2624, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_LOCATION_f1": 0.8585365853658538, |
|
"eval_LOCATION_number": 93, |
|
"eval_LOCATION_precision": 0.7857142857142857, |
|
"eval_LOCATION_recall": 0.946236559139785, |
|
"eval_ORGANIZATION_f1": 0.851963746223565, |
|
"eval_ORGANIZATION_number": 166, |
|
"eval_ORGANIZATION_precision": 0.8545454545454545, |
|
"eval_ORGANIZATION_recall": 0.8493975903614458, |
|
"eval_PERSON_f1": 0.9823321554770318, |
|
"eval_PERSON_number": 142, |
|
"eval_PERSON_precision": 0.9858156028368794, |
|
"eval_PERSON_recall": 0.9788732394366197, |
|
"eval_loss": 0.06350322812795639, |
|
"eval_overall_accuracy": 0.980246913580247, |
|
"eval_overall_f1": 0.8986568986568987, |
|
"eval_overall_precision": 0.8803827751196173, |
|
"eval_overall_recall": 0.9177057356608479, |
|
"eval_runtime": 0.6936, |
|
"eval_samples_per_second": 245.099, |
|
"eval_steps_per_second": 4.325, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.8208107948303223, |
|
"learning_rate": 3e-05, |
|
"loss": 0.054, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LOCATION_f1": 0.89, |
|
"eval_LOCATION_number": 93, |
|
"eval_LOCATION_precision": 0.8317757009345794, |
|
"eval_LOCATION_recall": 0.956989247311828, |
|
"eval_ORGANIZATION_f1": 0.8830409356725145, |
|
"eval_ORGANIZATION_number": 166, |
|
"eval_ORGANIZATION_precision": 0.8579545454545454, |
|
"eval_ORGANIZATION_recall": 0.9096385542168675, |
|
"eval_PERSON_f1": 0.9752650176678446, |
|
"eval_PERSON_number": 142, |
|
"eval_PERSON_precision": 0.9787234042553191, |
|
"eval_PERSON_recall": 0.971830985915493, |
|
"eval_loss": 0.05302194878458977, |
|
"eval_overall_accuracy": 0.9840877914951989, |
|
"eval_overall_f1": 0.9163636363636364, |
|
"eval_overall_precision": 0.8915094339622641, |
|
"eval_overall_recall": 0.942643391521197, |
|
"eval_runtime": 0.8031, |
|
"eval_samples_per_second": 211.67, |
|
"eval_steps_per_second": 3.735, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8113964796066284, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0268, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_LOCATION_f1": 0.8910891089108912, |
|
"eval_LOCATION_number": 93, |
|
"eval_LOCATION_precision": 0.8256880733944955, |
|
"eval_LOCATION_recall": 0.967741935483871, |
|
"eval_ORGANIZATION_f1": 0.8922155688622753, |
|
"eval_ORGANIZATION_number": 166, |
|
"eval_ORGANIZATION_precision": 0.8869047619047619, |
|
"eval_ORGANIZATION_recall": 0.8975903614457831, |
|
"eval_PERSON_f1": 0.9787234042553192, |
|
"eval_PERSON_number": 142, |
|
"eval_PERSON_precision": 0.9857142857142858, |
|
"eval_PERSON_recall": 0.971830985915493, |
|
"eval_loss": 0.06729695945978165, |
|
"eval_overall_accuracy": 0.9832647462277092, |
|
"eval_overall_f1": 0.9217603911980441, |
|
"eval_overall_precision": 0.9040767386091128, |
|
"eval_overall_recall": 0.940149625935162, |
|
"eval_runtime": 0.7494, |
|
"eval_samples_per_second": 226.858, |
|
"eval_steps_per_second": 4.003, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.316173553466797, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0159, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LOCATION_f1": 0.9312169312169313, |
|
"eval_LOCATION_number": 93, |
|
"eval_LOCATION_precision": 0.9166666666666666, |
|
"eval_LOCATION_recall": 0.946236559139785, |
|
"eval_ORGANIZATION_f1": 0.8973607038123167, |
|
"eval_ORGANIZATION_number": 166, |
|
"eval_ORGANIZATION_precision": 0.8742857142857143, |
|
"eval_ORGANIZATION_recall": 0.9216867469879518, |
|
"eval_PERSON_f1": 0.9716312056737589, |
|
"eval_PERSON_number": 142, |
|
"eval_PERSON_precision": 0.9785714285714285, |
|
"eval_PERSON_recall": 0.9647887323943662, |
|
"eval_loss": 0.054620783776044846, |
|
"eval_overall_accuracy": 0.9868312757201646, |
|
"eval_overall_f1": 0.9310344827586207, |
|
"eval_overall_precision": 0.9197080291970803, |
|
"eval_overall_recall": 0.942643391521197, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 277.708, |
|
"eval_steps_per_second": 4.901, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 6.452114105224609, |
|
"learning_rate": 0.0, |
|
"loss": 0.0108, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_LOCATION_f1": 0.9175257731958764, |
|
"eval_LOCATION_number": 93, |
|
"eval_LOCATION_precision": 0.8811881188118812, |
|
"eval_LOCATION_recall": 0.956989247311828, |
|
"eval_ORGANIZATION_f1": 0.8990825688073394, |
|
"eval_ORGANIZATION_number": 166, |
|
"eval_ORGANIZATION_precision": 0.9130434782608695, |
|
"eval_ORGANIZATION_recall": 0.8855421686746988, |
|
"eval_PERSON_f1": 0.9716312056737589, |
|
"eval_PERSON_number": 142, |
|
"eval_PERSON_precision": 0.9785714285714285, |
|
"eval_PERSON_recall": 0.9647887323943662, |
|
"eval_loss": 0.057080645114183426, |
|
"eval_overall_accuracy": 0.9857338820301783, |
|
"eval_overall_f1": 0.9290161892901619, |
|
"eval_overall_precision": 0.927860696517413, |
|
"eval_overall_recall": 0.9301745635910225, |
|
"eval_runtime": 0.6378, |
|
"eval_samples_per_second": 266.541, |
|
"eval_steps_per_second": 4.704, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 480, |
|
"total_flos": 189589573867848.0, |
|
"train_loss": 0.07397198428710301, |
|
"train_runtime": 129.3998, |
|
"train_samples_per_second": 59.158, |
|
"train_steps_per_second": 3.709 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 189589573867848.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|