|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.300418853759766, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.4092, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.7892045810686176, |
|
"eval_loss": 0.3456897437572479, |
|
"eval_precision": 0.8929804104477612, |
|
"eval_recall": 0.7553646117475905, |
|
"eval_runtime": 1.6637, |
|
"eval_samples_per_second": 239.829, |
|
"eval_steps_per_second": 30.054, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 9.286004066467285, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2282, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8676331036823873, |
|
"eval_loss": 0.258427232503891, |
|
"eval_precision": 0.8749292230261088, |
|
"eval_recall": 0.8612474995453718, |
|
"eval_runtime": 1.6592, |
|
"eval_samples_per_second": 240.478, |
|
"eval_steps_per_second": 30.135, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 13.956122398376465, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.138, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8429546050905274, |
|
"eval_loss": 0.44172462821006775, |
|
"eval_precision": 0.882529902138456, |
|
"eval_recall": 0.8198763411529368, |
|
"eval_runtime": 1.6567, |
|
"eval_samples_per_second": 240.84, |
|
"eval_steps_per_second": 30.18, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.09012622386217117, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0837, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8793019197207679, |
|
"eval_loss": 0.4037090241909027, |
|
"eval_precision": 0.8893184421534936, |
|
"eval_recall": 0.8708401527550463, |
|
"eval_runtime": 1.6737, |
|
"eval_samples_per_second": 238.388, |
|
"eval_steps_per_second": 29.873, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.347772121429443, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0426, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8873149414352814, |
|
"eval_loss": 0.5462044477462769, |
|
"eval_precision": 0.8806277372262774, |
|
"eval_recall": 0.8951172940534643, |
|
"eval_runtime": 1.6751, |
|
"eval_samples_per_second": 238.188, |
|
"eval_steps_per_second": 29.848, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.011684279888868332, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0502, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8707140332272888, |
|
"eval_loss": 0.5626452565193176, |
|
"eval_precision": 0.8618432385874246, |
|
"eval_recall": 0.8819785415530097, |
|
"eval_runtime": 1.6815, |
|
"eval_samples_per_second": 237.294, |
|
"eval_steps_per_second": 29.736, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.015737071633338928, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0242, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8848664457009163, |
|
"eval_loss": 0.6241247653961182, |
|
"eval_precision": 0.8977236138837015, |
|
"eval_recall": 0.8743862520458265, |
|
"eval_runtime": 1.6765, |
|
"eval_samples_per_second": 238.002, |
|
"eval_steps_per_second": 29.825, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.004997015465050936, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0217, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8692251105268142, |
|
"eval_loss": 0.7096332907676697, |
|
"eval_precision": 0.8579132638693325, |
|
"eval_recall": 0.885206401163848, |
|
"eval_runtime": 1.6742, |
|
"eval_samples_per_second": 238.324, |
|
"eval_steps_per_second": 29.865, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.004219838418066502, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0229, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9122807017543859, |
|
"eval_f1": 0.8954723392788977, |
|
"eval_loss": 0.611499547958374, |
|
"eval_precision": 0.8909569746108776, |
|
"eval_recall": 0.9004364429896345, |
|
"eval_runtime": 1.66, |
|
"eval_samples_per_second": 240.365, |
|
"eval_steps_per_second": 30.121, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.006534805987030268, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0109, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8744522298370696, |
|
"eval_loss": 0.7575166821479797, |
|
"eval_precision": 0.8795731707317074, |
|
"eval_recall": 0.8697945080923805, |
|
"eval_runtime": 1.6781, |
|
"eval_samples_per_second": 237.769, |
|
"eval_steps_per_second": 29.796, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.00184684619307518, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0068, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8861029031685659, |
|
"eval_loss": 0.7536790370941162, |
|
"eval_precision": 0.8937558980811576, |
|
"eval_recall": 0.879387161302055, |
|
"eval_runtime": 1.6764, |
|
"eval_samples_per_second": 238.006, |
|
"eval_steps_per_second": 29.825, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.01189060416072607, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0131, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8775533117267087, |
|
"eval_loss": 0.7247006297111511, |
|
"eval_precision": 0.873246730188791, |
|
"eval_recall": 0.8822967812329514, |
|
"eval_runtime": 1.6752, |
|
"eval_samples_per_second": 238.178, |
|
"eval_steps_per_second": 29.847, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.0015333497431129217, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0101, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8763538792940554, |
|
"eval_loss": 0.7927835583686829, |
|
"eval_precision": 0.8754297605404427, |
|
"eval_recall": 0.877295871976723, |
|
"eval_runtime": 1.6839, |
|
"eval_samples_per_second": 236.944, |
|
"eval_steps_per_second": 29.692, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.0037907068617641926, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0061, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8884169154604891, |
|
"eval_loss": 0.784883975982666, |
|
"eval_precision": 0.8874630556728391, |
|
"eval_recall": 0.8893889798145117, |
|
"eval_runtime": 1.6745, |
|
"eval_samples_per_second": 238.284, |
|
"eval_steps_per_second": 29.86, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.0031544596422463655, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0135, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8731122745782431, |
|
"eval_loss": 0.781574010848999, |
|
"eval_precision": 0.8829705994654449, |
|
"eval_recall": 0.864793598836152, |
|
"eval_runtime": 1.6721, |
|
"eval_samples_per_second": 238.624, |
|
"eval_steps_per_second": 29.903, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0015803646529093385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0081, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8757339815412664, |
|
"eval_loss": 0.7727145552635193, |
|
"eval_precision": 0.8766906299500427, |
|
"eval_recall": 0.8747954173486088, |
|
"eval_runtime": 1.6771, |
|
"eval_samples_per_second": 237.909, |
|
"eval_steps_per_second": 29.813, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.00223415601067245, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0027, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8763538792940554, |
|
"eval_loss": 0.81281578540802, |
|
"eval_precision": 0.8754297605404427, |
|
"eval_recall": 0.877295871976723, |
|
"eval_runtime": 1.6614, |
|
"eval_samples_per_second": 240.156, |
|
"eval_steps_per_second": 30.095, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0023393542505800724, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0041, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8817957385392532, |
|
"eval_loss": 0.8081415891647339, |
|
"eval_precision": 0.8827677592299257, |
|
"eval_recall": 0.8808419712675032, |
|
"eval_runtime": 1.6587, |
|
"eval_samples_per_second": 240.553, |
|
"eval_steps_per_second": 30.145, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0010473760776221752, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0018, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8793019197207679, |
|
"eval_loss": 0.8038576245307922, |
|
"eval_precision": 0.8893184421534936, |
|
"eval_recall": 0.8708401527550463, |
|
"eval_runtime": 1.6566, |
|
"eval_samples_per_second": 240.849, |
|
"eval_steps_per_second": 30.182, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0015798051608726382, |
|
"learning_rate": 0.0, |
|
"loss": 0.0025, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8799463033398397, |
|
"eval_loss": 0.803022563457489, |
|
"eval_precision": 0.8874803397294746, |
|
"eval_recall": 0.8733406073831607, |
|
"eval_runtime": 1.6611, |
|
"eval_samples_per_second": 240.198, |
|
"eval_steps_per_second": 30.1, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7584162436176000.0, |
|
"train_loss": 0.05501617935226589, |
|
"train_runtime": 867.5592, |
|
"train_samples_per_second": 83.867, |
|
"train_steps_per_second": 2.812 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7584162436176000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|