nerui-base-2 / trainer_state.json
apwic's picture
End of training
9bdd535 verified
raw
history blame
6.17 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 480,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.197399616241455,
"learning_rate": 4e-05,
"loss": 0.2624,
"step": 96
},
{
"epoch": 1.0,
"eval_LOCATION_f1": 0.8585365853658538,
"eval_LOCATION_number": 93,
"eval_LOCATION_precision": 0.7857142857142857,
"eval_LOCATION_recall": 0.946236559139785,
"eval_ORGANIZATION_f1": 0.851963746223565,
"eval_ORGANIZATION_number": 166,
"eval_ORGANIZATION_precision": 0.8545454545454545,
"eval_ORGANIZATION_recall": 0.8493975903614458,
"eval_PERSON_f1": 0.9823321554770318,
"eval_PERSON_number": 142,
"eval_PERSON_precision": 0.9858156028368794,
"eval_PERSON_recall": 0.9788732394366197,
"eval_loss": 0.06350322812795639,
"eval_overall_accuracy": 0.980246913580247,
"eval_overall_f1": 0.8986568986568987,
"eval_overall_precision": 0.8803827751196173,
"eval_overall_recall": 0.9177057356608479,
"eval_runtime": 0.6936,
"eval_samples_per_second": 245.099,
"eval_steps_per_second": 4.325,
"step": 96
},
{
"epoch": 2.0,
"grad_norm": 2.8208107948303223,
"learning_rate": 3e-05,
"loss": 0.054,
"step": 192
},
{
"epoch": 2.0,
"eval_LOCATION_f1": 0.89,
"eval_LOCATION_number": 93,
"eval_LOCATION_precision": 0.8317757009345794,
"eval_LOCATION_recall": 0.956989247311828,
"eval_ORGANIZATION_f1": 0.8830409356725145,
"eval_ORGANIZATION_number": 166,
"eval_ORGANIZATION_precision": 0.8579545454545454,
"eval_ORGANIZATION_recall": 0.9096385542168675,
"eval_PERSON_f1": 0.9752650176678446,
"eval_PERSON_number": 142,
"eval_PERSON_precision": 0.9787234042553191,
"eval_PERSON_recall": 0.971830985915493,
"eval_loss": 0.05302194878458977,
"eval_overall_accuracy": 0.9840877914951989,
"eval_overall_f1": 0.9163636363636364,
"eval_overall_precision": 0.8915094339622641,
"eval_overall_recall": 0.942643391521197,
"eval_runtime": 0.8031,
"eval_samples_per_second": 211.67,
"eval_steps_per_second": 3.735,
"step": 192
},
{
"epoch": 3.0,
"grad_norm": 1.8113964796066284,
"learning_rate": 2e-05,
"loss": 0.0268,
"step": 288
},
{
"epoch": 3.0,
"eval_LOCATION_f1": 0.8910891089108912,
"eval_LOCATION_number": 93,
"eval_LOCATION_precision": 0.8256880733944955,
"eval_LOCATION_recall": 0.967741935483871,
"eval_ORGANIZATION_f1": 0.8922155688622753,
"eval_ORGANIZATION_number": 166,
"eval_ORGANIZATION_precision": 0.8869047619047619,
"eval_ORGANIZATION_recall": 0.8975903614457831,
"eval_PERSON_f1": 0.9787234042553192,
"eval_PERSON_number": 142,
"eval_PERSON_precision": 0.9857142857142858,
"eval_PERSON_recall": 0.971830985915493,
"eval_loss": 0.06729695945978165,
"eval_overall_accuracy": 0.9832647462277092,
"eval_overall_f1": 0.9217603911980441,
"eval_overall_precision": 0.9040767386091128,
"eval_overall_recall": 0.940149625935162,
"eval_runtime": 0.7494,
"eval_samples_per_second": 226.858,
"eval_steps_per_second": 4.003,
"step": 288
},
{
"epoch": 4.0,
"grad_norm": 5.316173553466797,
"learning_rate": 1e-05,
"loss": 0.0159,
"step": 384
},
{
"epoch": 4.0,
"eval_LOCATION_f1": 0.9312169312169313,
"eval_LOCATION_number": 93,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.946236559139785,
"eval_ORGANIZATION_f1": 0.8973607038123167,
"eval_ORGANIZATION_number": 166,
"eval_ORGANIZATION_precision": 0.8742857142857143,
"eval_ORGANIZATION_recall": 0.9216867469879518,
"eval_PERSON_f1": 0.9716312056737589,
"eval_PERSON_number": 142,
"eval_PERSON_precision": 0.9785714285714285,
"eval_PERSON_recall": 0.9647887323943662,
"eval_loss": 0.054620783776044846,
"eval_overall_accuracy": 0.9868312757201646,
"eval_overall_f1": 0.9310344827586207,
"eval_overall_precision": 0.9197080291970803,
"eval_overall_recall": 0.942643391521197,
"eval_runtime": 0.6122,
"eval_samples_per_second": 277.708,
"eval_steps_per_second": 4.901,
"step": 384
},
{
"epoch": 5.0,
"grad_norm": 6.452114105224609,
"learning_rate": 0.0,
"loss": 0.0108,
"step": 480
},
{
"epoch": 5.0,
"eval_LOCATION_f1": 0.9175257731958764,
"eval_LOCATION_number": 93,
"eval_LOCATION_precision": 0.8811881188118812,
"eval_LOCATION_recall": 0.956989247311828,
"eval_ORGANIZATION_f1": 0.8990825688073394,
"eval_ORGANIZATION_number": 166,
"eval_ORGANIZATION_precision": 0.9130434782608695,
"eval_ORGANIZATION_recall": 0.8855421686746988,
"eval_PERSON_f1": 0.9716312056737589,
"eval_PERSON_number": 142,
"eval_PERSON_precision": 0.9785714285714285,
"eval_PERSON_recall": 0.9647887323943662,
"eval_loss": 0.057080645114183426,
"eval_overall_accuracy": 0.9857338820301783,
"eval_overall_f1": 0.9290161892901619,
"eval_overall_precision": 0.927860696517413,
"eval_overall_recall": 0.9301745635910225,
"eval_runtime": 0.6378,
"eval_samples_per_second": 266.541,
"eval_steps_per_second": 4.704,
"step": 480
},
{
"epoch": 5.0,
"step": 480,
"total_flos": 189589573867848.0,
"train_loss": 0.07397198428710301,
"train_runtime": 129.3998,
"train_samples_per_second": 59.158,
"train_steps_per_second": 3.709
}
],
"logging_steps": 500,
"max_steps": 480,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 189589573867848.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}