|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.331399440765381, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3182, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9571745015998031, |
|
"eval_f1": 0.7968127490039841, |
|
"eval_loss": 0.12841814756393433, |
|
"eval_precision": 0.746268656716418, |
|
"eval_recall": 0.8547008547008547, |
|
"eval_runtime": 0.966, |
|
"eval_samples_per_second": 193.575, |
|
"eval_steps_per_second": 3.105, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.366861581802368, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1137, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9561900073837066, |
|
"eval_f1": 0.7927927927927929, |
|
"eval_loss": 0.13024382293224335, |
|
"eval_precision": 0.7230046948356808, |
|
"eval_recall": 0.8774928774928775, |
|
"eval_runtime": 0.9793, |
|
"eval_samples_per_second": 190.957, |
|
"eval_steps_per_second": 3.063, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.3602290153503418, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.0683, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9606202313561408, |
|
"eval_f1": 0.8174386920980926, |
|
"eval_loss": 0.12490106374025345, |
|
"eval_precision": 0.783289817232376, |
|
"eval_recall": 0.8547008547008547, |
|
"eval_runtime": 0.9992, |
|
"eval_samples_per_second": 187.145, |
|
"eval_steps_per_second": 3.002, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.569642961025238, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0454, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9591434900319961, |
|
"eval_f1": 0.8233731739707835, |
|
"eval_loss": 0.1463680863380432, |
|
"eval_precision": 0.7711442786069652, |
|
"eval_recall": 0.8831908831908832, |
|
"eval_runtime": 0.9458, |
|
"eval_samples_per_second": 197.714, |
|
"eval_steps_per_second": 3.172, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.9764404892921448, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0325, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9640659611124784, |
|
"eval_f1": 0.8401084010840107, |
|
"eval_loss": 0.15568402409553528, |
|
"eval_precision": 0.8010335917312662, |
|
"eval_recall": 0.8831908831908832, |
|
"eval_runtime": 0.9702, |
|
"eval_samples_per_second": 192.747, |
|
"eval_steps_per_second": 3.092, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.7467350363731384, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0211, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9598818606940684, |
|
"eval_f1": 0.8411214953271028, |
|
"eval_loss": 0.2112356424331665, |
|
"eval_precision": 0.7914572864321608, |
|
"eval_recall": 0.8974358974358975, |
|
"eval_runtime": 0.9492, |
|
"eval_samples_per_second": 196.999, |
|
"eval_steps_per_second": 3.16, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.3930013179779053, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.015, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9606202313561408, |
|
"eval_f1": 0.8295904887714664, |
|
"eval_loss": 0.19438758492469788, |
|
"eval_precision": 0.7733990147783252, |
|
"eval_recall": 0.8945868945868946, |
|
"eval_runtime": 0.9407, |
|
"eval_samples_per_second": 198.787, |
|
"eval_steps_per_second": 3.189, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5287325978279114, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0113, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9665271966527197, |
|
"eval_f1": 0.8536585365853658, |
|
"eval_loss": 0.21514002978801727, |
|
"eval_precision": 0.813953488372093, |
|
"eval_recall": 0.8974358974358975, |
|
"eval_runtime": 0.9327, |
|
"eval_samples_per_second": 200.498, |
|
"eval_steps_per_second": 3.217, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.272814005613327, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0075, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9684961850849126, |
|
"eval_f1": 0.8536585365853658, |
|
"eval_loss": 0.199550062417984, |
|
"eval_precision": 0.813953488372093, |
|
"eval_recall": 0.8974358974358975, |
|
"eval_runtime": 0.9509, |
|
"eval_samples_per_second": 196.649, |
|
"eval_steps_per_second": 3.155, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.9517960548400879, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0067, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9684961850849126, |
|
"eval_f1": 0.8647140864714087, |
|
"eval_loss": 0.2077295184135437, |
|
"eval_precision": 0.8469945355191257, |
|
"eval_recall": 0.8831908831908832, |
|
"eval_runtime": 1.0115, |
|
"eval_samples_per_second": 184.866, |
|
"eval_steps_per_second": 2.966, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.23543691635131836, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0039, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9579128722618755, |
|
"eval_f1": 0.823841059602649, |
|
"eval_loss": 0.2609161138534546, |
|
"eval_precision": 0.7698019801980198, |
|
"eval_recall": 0.886039886039886, |
|
"eval_runtime": 0.9537, |
|
"eval_samples_per_second": 196.081, |
|
"eval_steps_per_second": 3.146, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.22459489107131958, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0028, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9648043317745508, |
|
"eval_f1": 0.8590971272229823, |
|
"eval_loss": 0.24980689585208893, |
|
"eval_precision": 0.8263157894736842, |
|
"eval_recall": 0.8945868945868946, |
|
"eval_runtime": 0.9427, |
|
"eval_samples_per_second": 198.361, |
|
"eval_steps_per_second": 3.182, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.4156136512756348, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0035, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9643120846665025, |
|
"eval_f1": 0.8493150684931507, |
|
"eval_loss": 0.24070732295513153, |
|
"eval_precision": 0.8179419525065963, |
|
"eval_recall": 0.8831908831908832, |
|
"eval_runtime": 0.9468, |
|
"eval_samples_per_second": 197.508, |
|
"eval_steps_per_second": 3.169, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.004436641000211239, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.003, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.963081466896382, |
|
"eval_f1": 0.8375838926174497, |
|
"eval_loss": 0.2474687099456787, |
|
"eval_precision": 0.7918781725888325, |
|
"eval_recall": 0.8888888888888888, |
|
"eval_runtime": 0.9445, |
|
"eval_samples_per_second": 197.991, |
|
"eval_steps_per_second": 3.176, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.024274416267871857, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0016, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9640659611124784, |
|
"eval_f1": 0.8445040214477211, |
|
"eval_loss": 0.25520774722099304, |
|
"eval_precision": 0.7974683544303798, |
|
"eval_recall": 0.8974358974358975, |
|
"eval_runtime": 0.9554, |
|
"eval_samples_per_second": 195.735, |
|
"eval_steps_per_second": 3.14, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.18013718724250793, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0016, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9665271966527197, |
|
"eval_f1": 0.8606557377049181, |
|
"eval_loss": 0.24627996981143951, |
|
"eval_precision": 0.8267716535433071, |
|
"eval_recall": 0.8974358974358975, |
|
"eval_runtime": 0.9538, |
|
"eval_samples_per_second": 196.059, |
|
"eval_steps_per_second": 3.145, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.005411619320511818, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0012, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9665271966527197, |
|
"eval_f1": 0.861072902338377, |
|
"eval_loss": 0.25002872943878174, |
|
"eval_precision": 0.8324468085106383, |
|
"eval_recall": 0.8917378917378918, |
|
"eval_runtime": 0.9536, |
|
"eval_samples_per_second": 196.089, |
|
"eval_steps_per_second": 3.146, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.02285461686551571, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0009, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.965296578882599, |
|
"eval_f1": 0.8586956521739131, |
|
"eval_loss": 0.26289603114128113, |
|
"eval_precision": 0.8207792207792208, |
|
"eval_recall": 0.9002849002849003, |
|
"eval_runtime": 0.9478, |
|
"eval_samples_per_second": 197.295, |
|
"eval_steps_per_second": 3.165, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.004692568443715572, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0014, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9650504553285749, |
|
"eval_f1": 0.8559782608695653, |
|
"eval_loss": 0.26193881034851074, |
|
"eval_precision": 0.8181818181818182, |
|
"eval_recall": 0.8974358974358975, |
|
"eval_runtime": 0.9587, |
|
"eval_samples_per_second": 195.056, |
|
"eval_steps_per_second": 3.129, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.07399484515190125, |
|
"learning_rate": 0.0, |
|
"loss": 0.0006, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9650504553285749, |
|
"eval_f1": 0.8555858310626703, |
|
"eval_loss": 0.2606537342071533, |
|
"eval_precision": 0.8198433420365535, |
|
"eval_recall": 0.8945868945868946, |
|
"eval_runtime": 0.958, |
|
"eval_samples_per_second": 195.202, |
|
"eval_steps_per_second": 3.132, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 903894941334000.0, |
|
"train_loss": 0.03300303453052381, |
|
"train_runtime": 513.1215, |
|
"train_samples_per_second": 65.793, |
|
"train_steps_per_second": 4.132 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 903894941334000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|