beit-base-patch16-224 / trainer_state.json
HorcruxNo13's picture
End of training
4af7a86 verified
raw
history blame
19.6 kB
{
"best_metric": 0.9491525423728814,
"best_model_checkpoint": "beit-base-patch16-224/checkpoint-51",
"epoch": 42.35294117647059,
"eval_steps": 500,
"global_step": 180,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9411764705882353,
"eval_accuracy": 0.864406779661017,
"eval_f1_score": 0.8151914626490897,
"eval_loss": 0.35985592007637024,
"eval_precision": 0.8831092928112214,
"eval_recall": 0.864406779661017,
"eval_runtime": 0.994,
"eval_samples_per_second": 59.356,
"eval_steps_per_second": 2.012,
"step": 4
},
{
"epoch": 1.8823529411764706,
"eval_accuracy": 0.8983050847457628,
"eval_f1_score": 0.8983050847457628,
"eval_loss": 0.2752338945865631,
"eval_precision": 0.8983050847457628,
"eval_recall": 0.8983050847457628,
"eval_runtime": 1.1891,
"eval_samples_per_second": 49.617,
"eval_steps_per_second": 1.682,
"step": 8
},
{
"epoch": 2.8235294117647056,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9286307743436357,
"eval_loss": 0.17347723245620728,
"eval_precision": 0.9293164462655988,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.0218,
"eval_samples_per_second": 57.739,
"eval_steps_per_second": 1.957,
"step": 12
},
{
"epoch": 3.5294117647058822,
"grad_norm": 5.715649604797363,
"learning_rate": 4.166666666666667e-05,
"loss": 0.2978,
"step": 15
},
{
"epoch": 4.0,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9199970045680336,
"eval_loss": 0.17451411485671997,
"eval_precision": 0.9311215290299315,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.228,
"eval_samples_per_second": 48.047,
"eval_steps_per_second": 1.629,
"step": 17
},
{
"epoch": 4.9411764705882355,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9170563800358625,
"eval_loss": 0.1887725591659546,
"eval_precision": 0.9196471809062606,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.0748,
"eval_samples_per_second": 54.895,
"eval_steps_per_second": 1.861,
"step": 21
},
{
"epoch": 5.882352941176471,
"eval_accuracy": 0.8983050847457628,
"eval_f1_score": 0.9023521272915945,
"eval_loss": 0.2818872034549713,
"eval_precision": 0.9092193117616847,
"eval_recall": 0.8983050847457628,
"eval_runtime": 1.2817,
"eval_samples_per_second": 46.032,
"eval_steps_per_second": 1.56,
"step": 25
},
{
"epoch": 6.823529411764706,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.900974731483206,
"eval_loss": 0.5331762433052063,
"eval_precision": 0.9229583975346687,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1367,
"eval_samples_per_second": 51.907,
"eval_steps_per_second": 1.76,
"step": 29
},
{
"epoch": 7.0588235294117645,
"grad_norm": 3.518982410430908,
"learning_rate": 4.62962962962963e-05,
"loss": 0.0283,
"step": 30
},
{
"epoch": 8.0,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9199970045680336,
"eval_loss": 0.5418176054954529,
"eval_precision": 0.9311215290299315,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.0994,
"eval_samples_per_second": 53.664,
"eval_steps_per_second": 1.819,
"step": 34
},
{
"epoch": 8.941176470588236,
"eval_accuracy": 0.8983050847457628,
"eval_f1_score": 0.8757595139110971,
"eval_loss": 0.6493940353393555,
"eval_precision": 0.9092009685230025,
"eval_recall": 0.8983050847457628,
"eval_runtime": 1.1076,
"eval_samples_per_second": 53.266,
"eval_steps_per_second": 1.806,
"step": 38
},
{
"epoch": 9.882352941176471,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9222355815847652,
"eval_loss": 0.5614629983901978,
"eval_precision": 0.9455205811138014,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.107,
"eval_samples_per_second": 53.298,
"eval_steps_per_second": 1.807,
"step": 42
},
{
"epoch": 10.588235294117647,
"grad_norm": 0.022936690598726273,
"learning_rate": 4.166666666666667e-05,
"loss": 0.0061,
"step": 45
},
{
"epoch": 10.823529411764707,
"eval_accuracy": 0.8983050847457628,
"eval_f1_score": 0.8857329111566401,
"eval_loss": 0.8766900897026062,
"eval_precision": 0.8910232266164471,
"eval_recall": 0.8983050847457628,
"eval_runtime": 1.0968,
"eval_samples_per_second": 53.791,
"eval_steps_per_second": 1.823,
"step": 46
},
{
"epoch": 12.0,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.9519982027408203,
"eval_loss": 0.3859255313873291,
"eval_precision": 0.961864406779661,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.1019,
"eval_samples_per_second": 53.546,
"eval_steps_per_second": 1.815,
"step": 51
},
{
"epoch": 12.941176470588236,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.4550356864929199,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1103,
"eval_samples_per_second": 53.137,
"eval_steps_per_second": 1.801,
"step": 55
},
{
"epoch": 13.882352941176471,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.947908749000523,
"eval_loss": 0.4313892722129822,
"eval_precision": 0.9476985709538053,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.1142,
"eval_samples_per_second": 52.955,
"eval_steps_per_second": 1.795,
"step": 59
},
{
"epoch": 14.117647058823529,
"grad_norm": 5.196343898773193,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.01,
"step": 60
},
{
"epoch": 14.823529411764707,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.9519982027408203,
"eval_loss": 0.41266247630119324,
"eval_precision": 0.961864406779661,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.1128,
"eval_samples_per_second": 53.019,
"eval_steps_per_second": 1.797,
"step": 63
},
{
"epoch": 16.0,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.947908749000523,
"eval_loss": 0.3284989297389984,
"eval_precision": 0.9476985709538053,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.1075,
"eval_samples_per_second": 53.271,
"eval_steps_per_second": 1.806,
"step": 68
},
{
"epoch": 16.941176470588236,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.947908749000523,
"eval_loss": 0.3179616332054138,
"eval_precision": 0.9476985709538053,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.0963,
"eval_samples_per_second": 53.819,
"eval_steps_per_second": 1.824,
"step": 72
},
{
"epoch": 17.647058823529413,
"grad_norm": 5.957318305969238,
"learning_rate": 3.240740740740741e-05,
"loss": 0.0076,
"step": 75
},
{
"epoch": 17.88235294117647,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9286307743436357,
"eval_loss": 0.44822579622268677,
"eval_precision": 0.9293164462655988,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1817,
"eval_samples_per_second": 49.929,
"eval_steps_per_second": 1.693,
"step": 76
},
{
"epoch": 18.823529411764707,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.44370484352111816,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1079,
"eval_samples_per_second": 53.253,
"eval_steps_per_second": 1.805,
"step": 80
},
{
"epoch": 20.0,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.4818989932537079,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.3186,
"eval_samples_per_second": 44.744,
"eval_steps_per_second": 1.517,
"step": 85
},
{
"epoch": 20.941176470588236,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9286307743436357,
"eval_loss": 0.5132895112037659,
"eval_precision": 0.9293164462655988,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1055,
"eval_samples_per_second": 53.367,
"eval_steps_per_second": 1.809,
"step": 89
},
{
"epoch": 21.176470588235293,
"grad_norm": 0.27098149061203003,
"learning_rate": 2.777777777777778e-05,
"loss": 0.0003,
"step": 90
},
{
"epoch": 21.88235294117647,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.947908749000523,
"eval_loss": 0.45395800471305847,
"eval_precision": 0.9476985709538053,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.1075,
"eval_samples_per_second": 53.275,
"eval_steps_per_second": 1.806,
"step": 93
},
{
"epoch": 22.823529411764707,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9170563800358625,
"eval_loss": 0.38566043972969055,
"eval_precision": 0.9196471809062606,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.0947,
"eval_samples_per_second": 53.897,
"eval_steps_per_second": 1.827,
"step": 97
},
{
"epoch": 24.0,
"eval_accuracy": 0.8983050847457628,
"eval_f1_score": 0.9023521272915945,
"eval_loss": 0.4077180027961731,
"eval_precision": 0.9092193117616847,
"eval_recall": 0.8983050847457628,
"eval_runtime": 1.1092,
"eval_samples_per_second": 53.192,
"eval_steps_per_second": 1.803,
"step": 102
},
{
"epoch": 24.705882352941178,
"grad_norm": 0.018473587930202484,
"learning_rate": 2.314814814814815e-05,
"loss": 0.0028,
"step": 105
},
{
"epoch": 24.941176470588236,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.947908749000523,
"eval_loss": 0.3955690562725067,
"eval_precision": 0.9476985709538053,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.2914,
"eval_samples_per_second": 45.688,
"eval_steps_per_second": 1.549,
"step": 106
},
{
"epoch": 25.88235294117647,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9286307743436357,
"eval_loss": 0.4670986831188202,
"eval_precision": 0.9293164462655988,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1219,
"eval_samples_per_second": 52.592,
"eval_steps_per_second": 1.783,
"step": 110
},
{
"epoch": 26.823529411764707,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.3811493515968323,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.2582,
"eval_samples_per_second": 46.893,
"eval_steps_per_second": 1.59,
"step": 114
},
{
"epoch": 28.0,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.3700270354747772,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1041,
"eval_samples_per_second": 53.436,
"eval_steps_per_second": 1.811,
"step": 119
},
{
"epoch": 28.235294117647058,
"grad_norm": 0.08375111222267151,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.0006,
"step": 120
},
{
"epoch": 28.941176470588236,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.40281012654304504,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1715,
"eval_samples_per_second": 50.362,
"eval_steps_per_second": 1.707,
"step": 123
},
{
"epoch": 29.88235294117647,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9080138226098403,
"eval_loss": 0.6924118995666504,
"eval_precision": 0.9106172049888072,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1072,
"eval_samples_per_second": 53.287,
"eval_steps_per_second": 1.806,
"step": 127
},
{
"epoch": 30.823529411764707,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9080138226098403,
"eval_loss": 0.6948609948158264,
"eval_precision": 0.9106172049888072,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1092,
"eval_samples_per_second": 53.191,
"eval_steps_per_second": 1.803,
"step": 131
},
{
"epoch": 31.764705882352942,
"grad_norm": 0.0031740041449666023,
"learning_rate": 1.388888888888889e-05,
"loss": 0.0033,
"step": 135
},
{
"epoch": 32.0,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.5888532996177673,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1154,
"eval_samples_per_second": 52.896,
"eval_steps_per_second": 1.793,
"step": 136
},
{
"epoch": 32.94117647058823,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.5128433108329773,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.0996,
"eval_samples_per_second": 53.657,
"eval_steps_per_second": 1.819,
"step": 140
},
{
"epoch": 33.88235294117647,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.9502338280215176,
"eval_loss": 0.44105064868927,
"eval_precision": 0.9521964718090626,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.3012,
"eval_samples_per_second": 45.342,
"eval_steps_per_second": 1.537,
"step": 144
},
{
"epoch": 34.8235294117647,
"eval_accuracy": 0.9491525423728814,
"eval_f1_score": 0.9502338280215176,
"eval_loss": 0.4420201778411865,
"eval_precision": 0.9521964718090626,
"eval_recall": 0.9491525423728814,
"eval_runtime": 1.1093,
"eval_samples_per_second": 53.188,
"eval_steps_per_second": 1.803,
"step": 148
},
{
"epoch": 35.294117647058826,
"grad_norm": 0.0013447869569063187,
"learning_rate": 9.259259259259259e-06,
"loss": 0.0013,
"step": 150
},
{
"epoch": 36.0,
"eval_accuracy": 0.9322033898305084,
"eval_f1_score": 0.9322033898305084,
"eval_loss": 0.5615989565849304,
"eval_precision": 0.9322033898305084,
"eval_recall": 0.9322033898305084,
"eval_runtime": 1.1347,
"eval_samples_per_second": 51.997,
"eval_steps_per_second": 1.763,
"step": 153
},
{
"epoch": 36.94117647058823,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6365456581115723,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.0934,
"eval_samples_per_second": 53.961,
"eval_steps_per_second": 1.829,
"step": 157
},
{
"epoch": 37.88235294117647,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6694910526275635,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.0997,
"eval_samples_per_second": 53.65,
"eval_steps_per_second": 1.819,
"step": 161
},
{
"epoch": 38.8235294117647,
"grad_norm": 0.0024713820312172174,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.0001,
"step": 165
},
{
"epoch": 38.8235294117647,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6845612525939941,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1919,
"eval_samples_per_second": 49.501,
"eval_steps_per_second": 1.678,
"step": 165
},
{
"epoch": 40.0,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6930243968963623,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1022,
"eval_samples_per_second": 53.53,
"eval_steps_per_second": 1.815,
"step": 170
},
{
"epoch": 40.94117647058823,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6957547068595886,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.1025,
"eval_samples_per_second": 53.515,
"eval_steps_per_second": 1.814,
"step": 174
},
{
"epoch": 41.88235294117647,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6966932415962219,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.0997,
"eval_samples_per_second": 53.649,
"eval_steps_per_second": 1.819,
"step": 178
},
{
"epoch": 42.35294117647059,
"grad_norm": 0.0012529775267466903,
"learning_rate": 0.0,
"loss": 0.0044,
"step": 180
},
{
"epoch": 42.35294117647059,
"eval_accuracy": 0.9152542372881356,
"eval_f1_score": 0.9131812483342053,
"eval_loss": 0.6952070593833923,
"eval_precision": 0.912013958125623,
"eval_recall": 0.9152542372881356,
"eval_runtime": 1.142,
"eval_samples_per_second": 51.664,
"eval_steps_per_second": 1.751,
"step": 180
},
{
"epoch": 42.35294117647059,
"step": 180,
"total_flos": 1.7260934287224177e+18,
"train_loss": 0.030212831471969064,
"train_runtime": 1290.1323,
"train_samples_per_second": 18.347,
"train_steps_per_second": 0.14
},
{
"epoch": 42.35294117647059,
"eval_accuracy": 0.9387755102040817,
"eval_f1_score": 0.9412065766745571,
"eval_loss": 0.3751787841320038,
"eval_precision": 0.9451036228444866,
"eval_recall": 0.9387755102040817,
"eval_runtime": 3.0643,
"eval_samples_per_second": 47.972,
"eval_steps_per_second": 1.632,
"step": 180
}
],
"logging_steps": 15,
"max_steps": 180,
"num_input_tokens_seen": 0,
"num_train_epochs": 45,
"save_steps": 500,
"total_flos": 1.7260934287224177e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}