|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.483087062835693, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5384, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7368421052631579, |
|
"eval_f1": 0.6617470147024391, |
|
"eval_loss": 0.49190446734428406, |
|
"eval_precision": 0.676996860830136, |
|
"eval_recall": 0.6538006910347336, |
|
"eval_runtime": 1.8289, |
|
"eval_samples_per_second": 218.165, |
|
"eval_steps_per_second": 27.339, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.5009255409240723, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4212, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.804783605457392, |
|
"eval_loss": 0.41747915744781494, |
|
"eval_precision": 0.7930195663670017, |
|
"eval_recall": 0.8358792507728678, |
|
"eval_runtime": 1.8239, |
|
"eval_samples_per_second": 218.767, |
|
"eval_steps_per_second": 27.414, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.475321054458618, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3413, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8008725610148754, |
|
"eval_loss": 0.34032315015792847, |
|
"eval_precision": 0.825657894736842, |
|
"eval_recall": 0.7850518276050191, |
|
"eval_runtime": 1.8225, |
|
"eval_samples_per_second": 218.935, |
|
"eval_steps_per_second": 27.436, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.4994115829467773, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2888, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8151164115613962, |
|
"eval_loss": 0.3277858793735504, |
|
"eval_precision": 0.8159193371512123, |
|
"eval_recall": 0.8143298781596654, |
|
"eval_runtime": 1.824, |
|
"eval_samples_per_second": 218.745, |
|
"eval_steps_per_second": 27.412, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.4922258853912354, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2577, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8289650949173301, |
|
"eval_loss": 0.3102515935897827, |
|
"eval_precision": 0.8325081997648369, |
|
"eval_recall": 0.82569558101473, |
|
"eval_runtime": 1.824, |
|
"eval_samples_per_second": 218.754, |
|
"eval_steps_per_second": 27.413, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.708418846130371, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2495, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8368501045387564, |
|
"eval_loss": 0.30735519528388977, |
|
"eval_precision": 0.8435514312676942, |
|
"eval_recall": 0.8310147299509002, |
|
"eval_runtime": 1.8256, |
|
"eval_samples_per_second": 218.558, |
|
"eval_steps_per_second": 27.388, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.44828805327415466, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2391, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8393634395533442, |
|
"eval_loss": 0.3005428612232208, |
|
"eval_precision": 0.8402278542707444, |
|
"eval_recall": 0.8385160938352427, |
|
"eval_runtime": 1.8233, |
|
"eval_samples_per_second": 218.828, |
|
"eval_steps_per_second": 27.422, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.012701988220215, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2177, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.29794931411743164, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 1.822, |
|
"eval_samples_per_second": 218.994, |
|
"eval_steps_per_second": 27.443, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.480318069458008, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2102, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8548827059465357, |
|
"eval_loss": 0.2961363196372986, |
|
"eval_precision": 0.8548827059465357, |
|
"eval_recall": 0.8548827059465357, |
|
"eval_runtime": 1.8216, |
|
"eval_samples_per_second": 219.038, |
|
"eval_steps_per_second": 27.448, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.2318220138549805, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2029, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8339841249519908, |
|
"eval_loss": 0.3042662441730499, |
|
"eval_precision": 0.8579231241892538, |
|
"eval_recall": 0.8177850518276051, |
|
"eval_runtime": 1.8231, |
|
"eval_samples_per_second": 218.855, |
|
"eval_steps_per_second": 27.425, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.4476318061351776, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1829, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8533986527862829, |
|
"eval_loss": 0.30592578649520874, |
|
"eval_precision": 0.8572003218020917, |
|
"eval_recall": 0.8498817966903074, |
|
"eval_runtime": 1.8223, |
|
"eval_samples_per_second": 218.951, |
|
"eval_steps_per_second": 27.437, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.3299665451049805, |
|
"learning_rate": 2e-05, |
|
"loss": 0.184, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.846679266293906, |
|
"eval_loss": 0.3001992702484131, |
|
"eval_precision": 0.8609191655801824, |
|
"eval_recall": 0.8356064739043463, |
|
"eval_runtime": 1.821, |
|
"eval_samples_per_second": 219.105, |
|
"eval_steps_per_second": 27.457, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.40616801381111145, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1802, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8649122807017544, |
|
"eval_loss": 0.29539263248443604, |
|
"eval_precision": 0.8710116366366366, |
|
"eval_recall": 0.8594744498999818, |
|
"eval_runtime": 1.8246, |
|
"eval_samples_per_second": 218.68, |
|
"eval_steps_per_second": 27.404, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.2211737632751465, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1684, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8642908431276217, |
|
"eval_loss": 0.3007645606994629, |
|
"eval_precision": 0.8633964654080464, |
|
"eval_recall": 0.8652027641389344, |
|
"eval_runtime": 1.8212, |
|
"eval_samples_per_second": 219.082, |
|
"eval_steps_per_second": 27.454, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.8058817386627197, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1627, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.3066510558128357, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 1.8221, |
|
"eval_samples_per_second": 218.981, |
|
"eval_steps_per_second": 27.441, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.3792524337768555, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1581, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8522278069611882, |
|
"eval_loss": 0.31070253252983093, |
|
"eval_precision": 0.8513631702756499, |
|
"eval_recall": 0.8531096563011457, |
|
"eval_runtime": 1.8214, |
|
"eval_samples_per_second": 219.068, |
|
"eval_steps_per_second": 27.452, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.5356613397598267, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1468, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8483536940081443, |
|
"eval_loss": 0.3229115903377533, |
|
"eval_precision": 0.8575792287132493, |
|
"eval_recall": 0.8406073831605747, |
|
"eval_runtime": 1.8217, |
|
"eval_samples_per_second": 219.021, |
|
"eval_steps_per_second": 27.446, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.944960832595825, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1433, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8456742372671576, |
|
"eval_loss": 0.32474786043167114, |
|
"eval_precision": 0.8536697247706422, |
|
"eval_recall": 0.8388343335151845, |
|
"eval_runtime": 1.8221, |
|
"eval_samples_per_second": 218.981, |
|
"eval_steps_per_second": 27.441, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.548107147216797, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1538, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8456742372671576, |
|
"eval_loss": 0.324598103761673, |
|
"eval_precision": 0.8536697247706422, |
|
"eval_recall": 0.8388343335151845, |
|
"eval_runtime": 1.8228, |
|
"eval_samples_per_second": 218.896, |
|
"eval_steps_per_second": 27.431, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.463918924331665, |
|
"learning_rate": 0.0, |
|
"loss": 0.1412, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8456742372671576, |
|
"eval_loss": 0.32347819209098816, |
|
"eval_precision": 0.8536697247706422, |
|
"eval_recall": 0.8388343335151845, |
|
"eval_runtime": 1.8216, |
|
"eval_samples_per_second": 219.041, |
|
"eval_steps_per_second": 27.449, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2294086073265701, |
|
"train_runtime": 651.9313, |
|
"train_samples_per_second": 111.607, |
|
"train_steps_per_second": 3.743 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|