|
{ |
|
"best_metric": 0.90932, |
|
"best_model_checkpoint": "../../checkpoint/imdb/electra-base/checkpoint-14076", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 15640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.89252, |
|
"eval_loss": 0.2527729868888855, |
|
"eval_runtime": 33.8056, |
|
"eval_samples_per_second": 739.522, |
|
"eval_steps_per_second": 2.899, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.680306905370844e-05, |
|
"loss": 0.272, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.90448, |
|
"eval_loss": 0.2465250939130783, |
|
"eval_runtime": 33.8629, |
|
"eval_samples_per_second": 738.272, |
|
"eval_steps_per_second": 2.894, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.360613810741688e-05, |
|
"loss": 0.1451, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9034, |
|
"eval_loss": 0.39296430349349976, |
|
"eval_runtime": 33.8123, |
|
"eval_samples_per_second": 739.377, |
|
"eval_steps_per_second": 2.898, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.040920716112532e-05, |
|
"loss": 0.0804, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.898, |
|
"eval_loss": 0.45841386914253235, |
|
"eval_runtime": 33.8469, |
|
"eval_samples_per_second": 738.621, |
|
"eval_steps_per_second": 2.895, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9056, |
|
"eval_loss": 0.5110316872596741, |
|
"eval_runtime": 33.885, |
|
"eval_samples_per_second": 737.789, |
|
"eval_steps_per_second": 2.892, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 3.721227621483376e-05, |
|
"loss": 0.0499, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.89784, |
|
"eval_loss": 0.5765578746795654, |
|
"eval_runtime": 33.9081, |
|
"eval_samples_per_second": 737.288, |
|
"eval_steps_per_second": 2.89, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 3.40153452685422e-05, |
|
"loss": 0.0334, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.89992, |
|
"eval_loss": 0.5621291995048523, |
|
"eval_runtime": 34.0076, |
|
"eval_samples_per_second": 735.13, |
|
"eval_steps_per_second": 2.882, |
|
"step": 5474 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 3.081841432225064e-05, |
|
"loss": 0.0243, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9016, |
|
"eval_loss": 0.6355283856391907, |
|
"eval_runtime": 33.9931, |
|
"eval_samples_per_second": 735.443, |
|
"eval_steps_per_second": 2.883, |
|
"step": 6256 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 2.7621483375959077e-05, |
|
"loss": 0.0195, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.90184, |
|
"eval_loss": 0.6819501519203186, |
|
"eval_runtime": 33.7784, |
|
"eval_samples_per_second": 740.119, |
|
"eval_steps_per_second": 2.901, |
|
"step": 7038 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.90152, |
|
"eval_loss": 0.6723979115486145, |
|
"eval_runtime": 33.9379, |
|
"eval_samples_per_second": 736.639, |
|
"eval_steps_per_second": 2.888, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 2.442455242966752e-05, |
|
"loss": 0.0132, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.90308, |
|
"eval_loss": 0.7561084628105164, |
|
"eval_runtime": 33.739, |
|
"eval_samples_per_second": 740.982, |
|
"eval_steps_per_second": 2.905, |
|
"step": 8602 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 2.122762148337596e-05, |
|
"loss": 0.0095, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.90576, |
|
"eval_loss": 0.7514671087265015, |
|
"eval_runtime": 33.7517, |
|
"eval_samples_per_second": 740.704, |
|
"eval_steps_per_second": 2.904, |
|
"step": 9384 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 1.80306905370844e-05, |
|
"loss": 0.0072, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.90604, |
|
"eval_loss": 0.7862638831138611, |
|
"eval_runtime": 33.8938, |
|
"eval_samples_per_second": 737.597, |
|
"eval_steps_per_second": 2.891, |
|
"step": 10166 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.90584, |
|
"eval_loss": 0.7231158018112183, |
|
"eval_runtime": 33.9086, |
|
"eval_samples_per_second": 737.276, |
|
"eval_steps_per_second": 2.89, |
|
"step": 10948 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 1.483375959079284e-05, |
|
"loss": 0.0067, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.90152, |
|
"eval_loss": 0.8694790005683899, |
|
"eval_runtime": 33.8199, |
|
"eval_samples_per_second": 739.211, |
|
"eval_steps_per_second": 2.898, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 1.163682864450128e-05, |
|
"loss": 0.0037, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.90492, |
|
"eval_loss": 0.8443425297737122, |
|
"eval_runtime": 33.8917, |
|
"eval_samples_per_second": 737.644, |
|
"eval_steps_per_second": 2.892, |
|
"step": 12512 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 8.439897698209718e-06, |
|
"loss": 0.0033, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.90808, |
|
"eval_loss": 0.8232758641242981, |
|
"eval_runtime": 33.8581, |
|
"eval_samples_per_second": 738.376, |
|
"eval_steps_per_second": 2.894, |
|
"step": 13294 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 5.242966751918159e-06, |
|
"loss": 0.0019, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.90932, |
|
"eval_loss": 0.8650785088539124, |
|
"eval_runtime": 33.9491, |
|
"eval_samples_per_second": 736.398, |
|
"eval_steps_per_second": 2.887, |
|
"step": 14076 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.90856, |
|
"eval_loss": 0.8838527202606201, |
|
"eval_runtime": 33.9568, |
|
"eval_samples_per_second": 736.23, |
|
"eval_steps_per_second": 2.886, |
|
"step": 14858 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 2.0460358056265987e-06, |
|
"loss": 0.0003, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.908, |
|
"eval_loss": 0.8944087624549866, |
|
"eval_runtime": 33.9267, |
|
"eval_samples_per_second": 736.882, |
|
"eval_steps_per_second": 2.889, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 15640, |
|
"total_flos": 3.288888192e+16, |
|
"train_loss": 0.0428766398688259, |
|
"train_runtime": 2610.1004, |
|
"train_samples_per_second": 191.564, |
|
"train_steps_per_second": 5.992 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 15640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.288888192e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|