|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.279976844787598, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.564, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_f1": 0.6246286393345217, |
|
"eval_loss": 0.520961344242096, |
|
"eval_precision": 0.6432360742705571, |
|
"eval_recall": 0.6178396072013094, |
|
"eval_runtime": 1.7251, |
|
"eval_samples_per_second": 231.293, |
|
"eval_steps_per_second": 28.984, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.9077770709991455, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5007, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7518796992481203, |
|
"eval_f1": 0.7122952431589911, |
|
"eval_loss": 0.4797453284263611, |
|
"eval_precision": 0.7062235989862011, |
|
"eval_recall": 0.7219494453537008, |
|
"eval_runtime": 1.727, |
|
"eval_samples_per_second": 231.043, |
|
"eval_steps_per_second": 28.953, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.219292640686035, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.428, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7933804817423211, |
|
"eval_loss": 0.39089536666870117, |
|
"eval_precision": 0.7873726262158407, |
|
"eval_recall": 0.800872885979269, |
|
"eval_runtime": 1.7391, |
|
"eval_samples_per_second": 229.423, |
|
"eval_steps_per_second": 28.75, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.223276615142822, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3751, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8151164115613962, |
|
"eval_loss": 0.34778618812561035, |
|
"eval_precision": 0.8159193371512123, |
|
"eval_recall": 0.8143298781596654, |
|
"eval_runtime": 1.7324, |
|
"eval_samples_per_second": 230.322, |
|
"eval_steps_per_second": 28.862, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.300344467163086, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.339, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8280070158749033, |
|
"eval_loss": 0.33690375089645386, |
|
"eval_precision": 0.8223795620437956, |
|
"eval_recall": 0.8346517548645208, |
|
"eval_runtime": 1.7412, |
|
"eval_samples_per_second": 229.147, |
|
"eval_steps_per_second": 28.715, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 6.1557698249816895, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3096, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.844327731092437, |
|
"eval_loss": 0.3206353187561035, |
|
"eval_precision": 0.8411320530352577, |
|
"eval_recall": 0.8477905073649754, |
|
"eval_runtime": 1.7364, |
|
"eval_samples_per_second": 229.787, |
|
"eval_steps_per_second": 28.795, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.458906412124634, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2931, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8306935047100303, |
|
"eval_loss": 0.31404000520706177, |
|
"eval_precision": 0.8372758729160114, |
|
"eval_recall": 0.8249681760320058, |
|
"eval_runtime": 1.74, |
|
"eval_samples_per_second": 229.312, |
|
"eval_steps_per_second": 28.736, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.627723217010498, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2765, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8461962888779714, |
|
"eval_loss": 0.30450862646102905, |
|
"eval_precision": 0.8453465227094517, |
|
"eval_recall": 0.8470631023822512, |
|
"eval_runtime": 1.739, |
|
"eval_samples_per_second": 229.443, |
|
"eval_steps_per_second": 28.752, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.7235734462738037, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2637, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8556004584112431, |
|
"eval_loss": 0.3003471791744232, |
|
"eval_precision": 0.8538865546218487, |
|
"eval_recall": 0.85738316057465, |
|
"eval_runtime": 1.7315, |
|
"eval_samples_per_second": 230.44, |
|
"eval_steps_per_second": 28.877, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.832447052001953, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2601, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8548827059465357, |
|
"eval_loss": 0.2909964919090271, |
|
"eval_precision": 0.8548827059465357, |
|
"eval_recall": 0.8548827059465357, |
|
"eval_runtime": 1.7405, |
|
"eval_samples_per_second": 229.242, |
|
"eval_steps_per_second": 28.727, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 12.791139602661133, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2547, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8641933287950987, |
|
"eval_loss": 0.28499358892440796, |
|
"eval_precision": 0.872603606453654, |
|
"eval_recall": 0.8569739952718676, |
|
"eval_runtime": 1.7358, |
|
"eval_samples_per_second": 229.86, |
|
"eval_steps_per_second": 28.804, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.72183895111084, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2426, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8696722245432793, |
|
"eval_loss": 0.279752641916275, |
|
"eval_precision": 0.8706135006701596, |
|
"eval_recall": 0.8687488634297145, |
|
"eval_runtime": 1.7375, |
|
"eval_samples_per_second": 229.634, |
|
"eval_steps_per_second": 28.776, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.6714916229248047, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2319, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8661961395983623, |
|
"eval_loss": 0.28113481402397156, |
|
"eval_precision": 0.8784532165625604, |
|
"eval_recall": 0.8562465902891435, |
|
"eval_runtime": 1.7421, |
|
"eval_samples_per_second": 229.035, |
|
"eval_steps_per_second": 28.701, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.962621688842773, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2359, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.2719880938529968, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 1.7325, |
|
"eval_samples_per_second": 230.305, |
|
"eval_steps_per_second": 28.86, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.756800413131714, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2229, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8736504011098378, |
|
"eval_loss": 0.2721886932849884, |
|
"eval_precision": 0.8718487394957983, |
|
"eval_recall": 0.8755228223313329, |
|
"eval_runtime": 1.7352, |
|
"eval_samples_per_second": 229.939, |
|
"eval_steps_per_second": 28.814, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.3028435707092285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2218, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8649563392675828, |
|
"eval_loss": 0.2730589210987091, |
|
"eval_precision": 0.8623655913978494, |
|
"eval_recall": 0.8677032187670486, |
|
"eval_runtime": 1.7334, |
|
"eval_samples_per_second": 230.181, |
|
"eval_steps_per_second": 28.845, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.6590169668197632, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2174, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8696722245432793, |
|
"eval_loss": 0.27383190393447876, |
|
"eval_precision": 0.8706135006701596, |
|
"eval_recall": 0.8687488634297145, |
|
"eval_runtime": 1.738, |
|
"eval_samples_per_second": 229.575, |
|
"eval_steps_per_second": 28.769, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.2556533813476562, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2165, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8703223612108386, |
|
"eval_loss": 0.27394846081733704, |
|
"eval_precision": 0.8694131129742446, |
|
"eval_recall": 0.8712493180578287, |
|
"eval_runtime": 1.738, |
|
"eval_samples_per_second": 229.571, |
|
"eval_steps_per_second": 28.768, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 7.895025253295898, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2153, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8751002084335417, |
|
"eval_loss": 0.2726791203022003, |
|
"eval_precision": 0.8780701754385964, |
|
"eval_recall": 0.8722949627204946, |
|
"eval_runtime": 1.7412, |
|
"eval_samples_per_second": 229.156, |
|
"eval_steps_per_second": 28.716, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.127798080444336, |
|
"learning_rate": 0.0, |
|
"loss": 0.2159, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8717238211879976, |
|
"eval_loss": 0.2725962698459625, |
|
"eval_precision": 0.8757194133300328, |
|
"eval_recall": 0.8680214584469903, |
|
"eval_runtime": 1.7378, |
|
"eval_samples_per_second": 229.603, |
|
"eval_steps_per_second": 28.772, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7635661149264000.0, |
|
"train_loss": 0.29423871978384547, |
|
"train_runtime": 626.8339, |
|
"train_samples_per_second": 116.075, |
|
"train_steps_per_second": 3.893 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7635661149264000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|