djbp's picture
End of training
611a9b2 verified
{
"best_metric": 0.8720870015535992,
"best_model_checkpoint": "swin-base-patch4-window7-224-in22k-MM_Classification_base_V10/checkpoint-61",
"epoch": 6.885245901639344,
"eval_steps": 500,
"global_step": 105,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6557377049180327,
"grad_norm": 3.5814733505249023,
"learning_rate": 4.545454545454546e-05,
"loss": 0.8498,
"step": 10
},
{
"epoch": 0.9836065573770492,
"step": 15,
"train_accuracy": 0.8171125759338245,
"train_auc": 0.8967571163903928,
"train_loss": 0.47165462374687195,
"train_runtime": 694.4657,
"train_samples_per_second": 11.141,
"train_steps_per_second": 0.088
},
{
"epoch": 0.9836065573770492,
"eval_accuracy": 0.8327291558777835,
"eval_auc": 0.9183935491091096,
"eval_loss": 0.4327956438064575,
"eval_runtime": 205.3728,
"eval_samples_per_second": 9.402,
"eval_steps_per_second": 0.078,
"step": 15
},
{
"epoch": 1.3114754098360657,
"grad_norm": 2.3799431324005127,
"learning_rate": 4.5212765957446815e-05,
"loss": 0.5123,
"step": 20
},
{
"epoch": 1.9672131147540983,
"grad_norm": 2.067809581756592,
"learning_rate": 3.9893617021276594e-05,
"loss": 0.4085,
"step": 30
},
{
"epoch": 1.9672131147540983,
"step": 30,
"train_accuracy": 0.8517513248028952,
"train_auc": 0.9387841345112529,
"train_loss": 0.36320993304252625,
"train_runtime": 684.0745,
"train_samples_per_second": 11.31,
"train_steps_per_second": 0.089
},
{
"epoch": 1.9672131147540983,
"eval_accuracy": 0.8518902123252201,
"eval_auc": 0.9423845489697825,
"eval_loss": 0.37112149596214294,
"eval_runtime": 205.1458,
"eval_samples_per_second": 9.413,
"eval_steps_per_second": 0.078,
"step": 30
},
{
"epoch": 2.6229508196721314,
"grad_norm": 2.11800479888916,
"learning_rate": 3.4574468085106386e-05,
"loss": 0.3717,
"step": 40
},
{
"epoch": 2.9508196721311473,
"step": 45,
"train_accuracy": 0.864029985782603,
"train_auc": 0.9527023779434378,
"train_loss": 0.3227292597293854,
"train_runtime": 681.4027,
"train_samples_per_second": 11.355,
"train_steps_per_second": 0.09
},
{
"epoch": 2.9508196721311473,
"eval_accuracy": 0.8653547384774728,
"eval_auc": 0.9487342167219569,
"eval_loss": 0.3345213234424591,
"eval_runtime": 203.71,
"eval_samples_per_second": 9.479,
"eval_steps_per_second": 0.079,
"step": 45
},
{
"epoch": 3.278688524590164,
"grad_norm": 4.481107711791992,
"learning_rate": 2.925531914893617e-05,
"loss": 0.3301,
"step": 50
},
{
"epoch": 3.9344262295081966,
"grad_norm": 2.703953981399536,
"learning_rate": 2.393617021276596e-05,
"loss": 0.3196,
"step": 60
},
{
"epoch": 4.0,
"step": 61,
"train_accuracy": 0.8733359183145922,
"train_auc": 0.9563002964315263,
"train_loss": 0.30748647451400757,
"train_runtime": 678.9663,
"train_samples_per_second": 11.395,
"train_steps_per_second": 0.09
},
{
"epoch": 4.0,
"eval_accuracy": 0.8720870015535992,
"eval_auc": 0.9534343762433686,
"eval_loss": 0.3244781792163849,
"eval_runtime": 204.3133,
"eval_samples_per_second": 9.451,
"eval_steps_per_second": 0.078,
"step": 61
},
{
"epoch": 4.590163934426229,
"grad_norm": 1.967414379119873,
"learning_rate": 1.8617021276595745e-05,
"loss": 0.3279,
"step": 70
},
{
"epoch": 4.983606557377049,
"step": 76,
"train_accuracy": 0.8813493602171384,
"train_auc": 0.9603495718478623,
"train_loss": 0.2974039912223816,
"train_runtime": 680.7385,
"train_samples_per_second": 11.366,
"train_steps_per_second": 0.09
},
{
"epoch": 4.983606557377049,
"eval_accuracy": 0.8694976696012429,
"eval_auc": 0.953884522107936,
"eval_loss": 0.3240814805030823,
"eval_runtime": 203.7726,
"eval_samples_per_second": 9.476,
"eval_steps_per_second": 0.079,
"step": 76
},
{
"epoch": 5.245901639344262,
"grad_norm": 1.8759346008300781,
"learning_rate": 1.3297872340425532e-05,
"loss": 0.3042,
"step": 80
},
{
"epoch": 5.901639344262295,
"grad_norm": 2.394904136657715,
"learning_rate": 7.97872340425532e-06,
"loss": 0.2926,
"step": 90
},
{
"epoch": 5.967213114754099,
"step": 91,
"train_accuracy": 0.8858730774201887,
"train_auc": 0.9646418504220758,
"train_loss": 0.28083333373069763,
"train_runtime": 681.28,
"train_samples_per_second": 11.357,
"train_steps_per_second": 0.09
},
{
"epoch": 5.967213114754099,
"eval_accuracy": 0.8674262040393579,
"eval_auc": 0.9548863198183253,
"eval_loss": 0.3211652338504791,
"eval_runtime": 205.0822,
"eval_samples_per_second": 9.416,
"eval_steps_per_second": 0.078,
"step": 91
},
{
"epoch": 6.557377049180328,
"grad_norm": 1.7362953424453735,
"learning_rate": 2.6595744680851065e-06,
"loss": 0.2821,
"step": 100
},
{
"epoch": 6.885245901639344,
"step": 105,
"train_accuracy": 0.8937572702597906,
"train_auc": 0.9682688664846578,
"train_loss": 0.26618629693984985,
"train_runtime": 676.8276,
"train_samples_per_second": 11.431,
"train_steps_per_second": 0.09
},
{
"epoch": 6.885245901639344,
"eval_accuracy": 0.8705334023821854,
"eval_auc": 0.9549996078144867,
"eval_loss": 0.31725937128067017,
"eval_runtime": 203.7289,
"eval_samples_per_second": 9.478,
"eval_steps_per_second": 0.079,
"step": 105
},
{
"epoch": 6.885245901639344,
"step": 105,
"total_flos": 4.1785312376666235e+18,
"train_loss": 0.3947783969697498,
"train_runtime": 11418.3292,
"train_samples_per_second": 4.743,
"train_steps_per_second": 0.009
}
],
"logging_steps": 10,
"max_steps": 105,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.1785312376666235e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}