|
{ |
|
"best_metric": 0.6530612111091614, |
|
"best_model_checkpoint": "bert_uncased_L-4_H-128_A-2_mnli/checkpoint-12272", |
|
"epoch": 13.0, |
|
"eval_steps": 500, |
|
"global_step": 19942, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.6993112564086914, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.8752, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6558329088130412, |
|
"eval_loss": 0.7743282318115234, |
|
"eval_runtime": 3.4721, |
|
"eval_samples_per_second": 2826.817, |
|
"eval_steps_per_second": 11.232, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.562037944793701, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.7714, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6856851757514009, |
|
"eval_loss": 0.7263490557670593, |
|
"eval_runtime": 3.284, |
|
"eval_samples_per_second": 2988.699, |
|
"eval_steps_per_second": 11.876, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.291146993637085, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.7255, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7019867549668874, |
|
"eval_loss": 0.6945955157279968, |
|
"eval_runtime": 3.276, |
|
"eval_samples_per_second": 2996.017, |
|
"eval_steps_per_second": 11.905, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.2317264080047607, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6927, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7087111563932756, |
|
"eval_loss": 0.6788806915283203, |
|
"eval_runtime": 3.3205, |
|
"eval_samples_per_second": 2955.889, |
|
"eval_steps_per_second": 11.745, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.6884801387786865, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6662, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7205298013245033, |
|
"eval_loss": 0.6656607985496521, |
|
"eval_runtime": 3.3278, |
|
"eval_samples_per_second": 2949.42, |
|
"eval_steps_per_second": 11.72, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.7260897159576416, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6441, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7228731533367295, |
|
"eval_loss": 0.6691338419914246, |
|
"eval_runtime": 3.2556, |
|
"eval_samples_per_second": 3014.76, |
|
"eval_steps_per_second": 11.979, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.3097150325775146, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.625, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7258278145695364, |
|
"eval_loss": 0.6622374057769775, |
|
"eval_runtime": 3.2835, |
|
"eval_samples_per_second": 2989.191, |
|
"eval_steps_per_second": 11.878, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.3115994930267334, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.607, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7314314824248599, |
|
"eval_loss": 0.6530612111091614, |
|
"eval_runtime": 3.1955, |
|
"eval_samples_per_second": 3071.492, |
|
"eval_steps_per_second": 12.205, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.484405994415283, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.5894, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7308201732042792, |
|
"eval_loss": 0.6612954139709473, |
|
"eval_runtime": 3.2486, |
|
"eval_samples_per_second": 3021.261, |
|
"eval_steps_per_second": 12.005, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.5099828243255615, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5754, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7292919001528273, |
|
"eval_loss": 0.6590717434883118, |
|
"eval_runtime": 3.2884, |
|
"eval_samples_per_second": 2984.725, |
|
"eval_steps_per_second": 11.86, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.7104341983795166, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.5615, |
|
"step": 16874 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7286805909322466, |
|
"eval_loss": 0.6634514927864075, |
|
"eval_runtime": 3.2403, |
|
"eval_samples_per_second": 3029.047, |
|
"eval_steps_per_second": 12.036, |
|
"step": 16874 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.729738473892212, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.5477, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7343861436576669, |
|
"eval_loss": 0.6701191663742065, |
|
"eval_runtime": 3.2255, |
|
"eval_samples_per_second": 3042.947, |
|
"eval_steps_per_second": 12.091, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.311851978302002, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.5343, |
|
"step": 19942 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7342842587875701, |
|
"eval_loss": 0.6698570847511292, |
|
"eval_runtime": 3.2844, |
|
"eval_samples_per_second": 2988.397, |
|
"eval_steps_per_second": 11.874, |
|
"step": 19942 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 19942, |
|
"total_flos": 6353499042229248.0, |
|
"train_loss": 0.6473511495103378, |
|
"train_runtime": 2152.219, |
|
"train_samples_per_second": 9123.188, |
|
"train_steps_per_second": 35.638 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 76700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6353499042229248.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|