|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.053804397583008, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.4355, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.835906358747232, |
|
"eval_loss": 0.3243214786052704, |
|
"eval_precision": 0.853844109243139, |
|
"eval_recall": 0.8227859610838335, |
|
"eval_runtime": 4.9832, |
|
"eval_samples_per_second": 80.069, |
|
"eval_steps_per_second": 10.034, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 23.150257110595703, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2295, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8701248742380304, |
|
"eval_loss": 0.3046626150608063, |
|
"eval_precision": 0.8624507874015748, |
|
"eval_recall": 0.8794780869248955, |
|
"eval_runtime": 4.975, |
|
"eval_samples_per_second": 80.2, |
|
"eval_steps_per_second": 10.05, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.44390636682510376, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1337, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.879667048676036, |
|
"eval_loss": 0.3747338354587555, |
|
"eval_precision": 0.8778361344537815, |
|
"eval_recall": 0.8815693762502272, |
|
"eval_runtime": 4.9774, |
|
"eval_samples_per_second": 80.162, |
|
"eval_steps_per_second": 10.045, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 15.949886322021484, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1038, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8651222336500356, |
|
"eval_loss": 0.41882890462875366, |
|
"eval_precision": 0.8518339768339769, |
|
"eval_recall": 0.8866612111292962, |
|
"eval_runtime": 5.0099, |
|
"eval_samples_per_second": 79.643, |
|
"eval_steps_per_second": 9.98, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.7783217430114746, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.072, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.6270534992218018, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 5.0204, |
|
"eval_samples_per_second": 79.476, |
|
"eval_steps_per_second": 9.959, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 44.57243347167969, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0462, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8695225637671682, |
|
"eval_loss": 0.6129250526428223, |
|
"eval_precision": 0.8631532846715328, |
|
"eval_recall": 0.8769776322967813, |
|
"eval_runtime": 4.9643, |
|
"eval_samples_per_second": 80.375, |
|
"eval_steps_per_second": 10.072, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.024074144661426544, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0459, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8649122807017544, |
|
"eval_loss": 0.5890637636184692, |
|
"eval_precision": 0.8710116366366366, |
|
"eval_recall": 0.8594744498999818, |
|
"eval_runtime": 4.9832, |
|
"eval_samples_per_second": 80.069, |
|
"eval_steps_per_second": 10.034, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.022918157279491425, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0391, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8680720368560659, |
|
"eval_loss": 0.5972921252250671, |
|
"eval_precision": 0.8587217615098657, |
|
"eval_recall": 0.8802054919076197, |
|
"eval_runtime": 5.0003, |
|
"eval_samples_per_second": 79.795, |
|
"eval_steps_per_second": 9.999, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 106.23094177246094, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0307, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8584865509022812, |
|
"eval_loss": 0.7086873054504395, |
|
"eval_precision": 0.8441043083900227, |
|
"eval_recall": 0.8863429714493545, |
|
"eval_runtime": 4.9859, |
|
"eval_samples_per_second": 80.026, |
|
"eval_steps_per_second": 10.028, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.024997469037771225, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0199, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8717112228173498, |
|
"eval_loss": 0.7264124155044556, |
|
"eval_precision": 0.8869295958279009, |
|
"eval_recall": 0.8597926895799237, |
|
"eval_runtime": 4.9651, |
|
"eval_samples_per_second": 80.361, |
|
"eval_steps_per_second": 10.07, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.004392046481370926, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0105, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8757339815412664, |
|
"eval_loss": 0.6738360524177551, |
|
"eval_precision": 0.8766906299500427, |
|
"eval_recall": 0.8747954173486088, |
|
"eval_runtime": 5.0179, |
|
"eval_samples_per_second": 79.516, |
|
"eval_steps_per_second": 9.964, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.004026818089187145, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0131, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.882467302933899, |
|
"eval_loss": 0.7488105297088623, |
|
"eval_precision": 0.8732988802756245, |
|
"eval_recall": 0.8940716493907983, |
|
"eval_runtime": 5.0007, |
|
"eval_samples_per_second": 79.788, |
|
"eval_steps_per_second": 9.999, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.004543425515294075, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0102, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8792560061999484, |
|
"eval_loss": 0.7154756784439087, |
|
"eval_precision": 0.8707622232472325, |
|
"eval_recall": 0.889798145117294, |
|
"eval_runtime": 5.0136, |
|
"eval_samples_per_second": 79.584, |
|
"eval_steps_per_second": 9.973, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.0037931231781840324, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0061, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8894993300948346, |
|
"eval_loss": 0.7196279168128967, |
|
"eval_precision": 0.8850535598035154, |
|
"eval_recall": 0.8943898890707401, |
|
"eval_runtime": 5.009, |
|
"eval_samples_per_second": 79.657, |
|
"eval_steps_per_second": 9.982, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.0027608012314885855, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0138, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.884617951284618, |
|
"eval_loss": 0.7618029713630676, |
|
"eval_precision": 0.8772893772893773, |
|
"eval_recall": 0.8933442444080741, |
|
"eval_runtime": 5.0251, |
|
"eval_samples_per_second": 79.401, |
|
"eval_steps_per_second": 9.95, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0022813216783106327, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0075, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8873149414352814, |
|
"eval_loss": 0.7252941727638245, |
|
"eval_precision": 0.8806277372262774, |
|
"eval_recall": 0.8951172940534643, |
|
"eval_runtime": 5.0138, |
|
"eval_samples_per_second": 79.581, |
|
"eval_steps_per_second": 9.973, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 15.638340950012207, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0063, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8840781602687784, |
|
"eval_loss": 0.7560042142868042, |
|
"eval_precision": 0.87816715542522, |
|
"eval_recall": 0.89084378977996, |
|
"eval_runtime": 5.0052, |
|
"eval_samples_per_second": 79.718, |
|
"eval_steps_per_second": 9.99, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.002121408935636282, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0066, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8856624319419237, |
|
"eval_loss": 0.748332142829895, |
|
"eval_precision": 0.8758364312267658, |
|
"eval_recall": 0.8983451536643026, |
|
"eval_runtime": 4.9788, |
|
"eval_samples_per_second": 80.139, |
|
"eval_steps_per_second": 10.043, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.004570267163217068, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0023, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.884617951284618, |
|
"eval_loss": 0.7535205483436584, |
|
"eval_precision": 0.8772893772893773, |
|
"eval_recall": 0.8933442444080741, |
|
"eval_runtime": 5.0013, |
|
"eval_samples_per_second": 79.779, |
|
"eval_steps_per_second": 9.997, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0018295175395905972, |
|
"learning_rate": 0.0, |
|
"loss": 0.0021, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8878351186601172, |
|
"eval_loss": 0.7535876035690308, |
|
"eval_precision": 0.879776516905975, |
|
"eval_recall": 0.8976177486815784, |
|
"eval_runtime": 5.0187, |
|
"eval_samples_per_second": 79.503, |
|
"eval_steps_per_second": 9.963, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7584162436176000.0, |
|
"train_loss": 0.06173487283655855, |
|
"train_runtime": 2765.1299, |
|
"train_samples_per_second": 26.313, |
|
"train_steps_per_second": 0.882 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7584162436176000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|