{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.96969747543335, "learning_rate": 4.75e-05, "loss": 0.5526, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7142857142857143, "eval_f1": 0.6246286393345217, "eval_loss": 0.5089075565338135, "eval_precision": 0.6432360742705571, "eval_recall": 0.6178396072013094, "eval_runtime": 5.1308, "eval_samples_per_second": 77.766, "eval_steps_per_second": 9.745, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.059352159500122, "learning_rate": 4.5e-05, "loss": 0.4578, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7994987468671679, "eval_f1": 0.771804403774664, "eval_loss": 0.4193674325942993, "eval_precision": 0.7619509396853147, "eval_recall": 0.7906437534097108, "eval_runtime": 5.0426, "eval_samples_per_second": 79.125, "eval_steps_per_second": 9.915, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.294614553451538, "learning_rate": 4.25e-05, "loss": 0.3632, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.7866573438078395, "eval_loss": 0.3468063175678253, "eval_precision": 0.8221645717626425, "eval_recall": 0.7672304055282779, "eval_runtime": 5.0451, "eval_samples_per_second": 79.086, "eval_steps_per_second": 9.911, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.894381046295166, "learning_rate": 4e-05, "loss": 0.3063, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8589543987905864, "eval_loss": 0.2975314259529114, "eval_precision": 0.8564068100358423, "eval_recall": 0.8616566648481543, "eval_runtime": 5.045, "eval_samples_per_second": 79.087, "eval_steps_per_second": 9.911, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.5420114994049072, "learning_rate": 3.7500000000000003e-05, "loss": 0.2771, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8524204953403198, "eval_loss": 0.3194977939128876, "eval_precision": 0.8409052784611943, "eval_recall": 0.8695671940352792, "eval_runtime": 5.0554, "eval_samples_per_second": 78.926, "eval_steps_per_second": 9.89, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.310750961303711, "learning_rate": 3.5e-05, "loss": 0.2463, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8536612749904566, "eval_loss": 0.28968048095703125, "eval_precision": 0.8496330709593418, "eval_recall": 0.8581105655573741, "eval_runtime": 5.0495, "eval_samples_per_second": 79.017, "eval_steps_per_second": 9.902, "step": 732 }, { "epoch": 7.0, "grad_norm": 2.563045024871826, "learning_rate": 3.2500000000000004e-05, "loss": 0.2317, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.27178412675857544, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 5.0509, "eval_samples_per_second": 78.995, "eval_steps_per_second": 9.899, "step": 854 }, { "epoch": 8.0, "grad_norm": 1.676988959312439, "learning_rate": 3e-05, "loss": 0.2182, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8654398826979472, "eval_loss": 0.26825374364852905, "eval_precision": 0.8556454369374502, "eval_recall": 0.8784324422622295, "eval_runtime": 5.0526, "eval_samples_per_second": 78.969, "eval_steps_per_second": 9.896, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.442895412445068, "learning_rate": 2.7500000000000004e-05, "loss": 0.2065, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.844327731092437, "eval_loss": 0.27725929021835327, "eval_precision": 0.8411320530352577, "eval_recall": 0.8477905073649754, "eval_runtime": 5.0455, "eval_samples_per_second": 79.08, "eval_steps_per_second": 9.91, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.095264434814453, "learning_rate": 2.5e-05, "loss": 0.2012, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.852937255424767, "eval_loss": 0.28407934308052063, "eval_precision": 0.8674217731421121, "eval_recall": 0.8416530278232406, "eval_runtime": 5.0513, "eval_samples_per_second": 78.99, "eval_steps_per_second": 9.898, "step": 1220 }, { "epoch": 11.0, "grad_norm": 7.4546098709106445, "learning_rate": 2.25e-05, "loss": 0.1944, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8616171059774413, "eval_loss": 0.27333080768585205, "eval_precision": 0.859873949579832, "eval_recall": 0.8634297144935443, "eval_runtime": 5.0526, "eval_samples_per_second": 78.968, "eval_steps_per_second": 9.896, "step": 1342 }, { "epoch": 12.0, "grad_norm": 8.82780647277832, "learning_rate": 2e-05, "loss": 0.176, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8724195749658803, "eval_loss": 0.27092599868774414, "eval_precision": 0.8848766823362741, "eval_recall": 0.8622931442080378, "eval_runtime": 5.0447, "eval_samples_per_second": 79.094, "eval_steps_per_second": 9.911, "step": 1464 }, { "epoch": 13.0, "grad_norm": 3.990729331970215, "learning_rate": 1.75e-05, "loss": 0.168, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8736504011098378, "eval_loss": 0.26514801383018494, "eval_precision": 0.8718487394957983, "eval_recall": 0.8755228223313329, "eval_runtime": 5.0943, "eval_samples_per_second": 78.323, "eval_steps_per_second": 9.815, "step": 1586 }, { "epoch": 14.0, "grad_norm": 2.973822593688965, "learning_rate": 1.5e-05, "loss": 0.1644, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8727838950061173, "eval_loss": 0.2710956037044525, "eval_precision": 0.8656898656898657, "eval_recall": 0.8812511365702855, "eval_runtime": 5.0742, "eval_samples_per_second": 78.633, "eval_steps_per_second": 9.854, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.690046548843384, "learning_rate": 1.25e-05, "loss": 0.1541, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8683279483657071, "eval_loss": 0.2790246605873108, "eval_precision": 0.873366724738676, "eval_recall": 0.863747954173486, "eval_runtime": 5.055, "eval_samples_per_second": 78.932, "eval_steps_per_second": 9.891, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.9607453346252441, "learning_rate": 1e-05, "loss": 0.1574, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8656154317207594, "eval_loss": 0.27665647864341736, "eval_precision": 0.869546382820719, "eval_recall": 0.861974904528096, "eval_runtime": 5.06, "eval_samples_per_second": 78.853, "eval_steps_per_second": 9.881, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.14830851554870605, "learning_rate": 7.5e-06, "loss": 0.1508, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8676337535436396, "eval_loss": 0.2825946509838104, "eval_precision": 0.8658613445378152, "eval_recall": 0.8694762684124386, "eval_runtime": 5.0609, "eval_samples_per_second": 78.84, "eval_steps_per_second": 9.88, "step": 2074 }, { "epoch": 18.0, "grad_norm": 1.0990123748779297, "learning_rate": 5e-06, "loss": 0.1463, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8614765038536611, "eval_loss": 0.2823769748210907, "eval_precision": 0.8686536646744258, "eval_recall": 0.8552009456264775, "eval_runtime": 5.0926, "eval_samples_per_second": 78.349, "eval_steps_per_second": 9.818, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.10962895303964615, "learning_rate": 2.5e-06, "loss": 0.1467, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8572517421602788, "eval_loss": 0.2876257598400116, "eval_precision": 0.8679426449878376, "eval_recall": 0.8484269867248591, "eval_runtime": 5.0519, "eval_samples_per_second": 78.981, "eval_steps_per_second": 9.897, "step": 2318 }, { "epoch": 20.0, "grad_norm": 4.516838550567627, "learning_rate": 0.0, "loss": 0.1399, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8649122807017544, "eval_loss": 0.2823803424835205, "eval_precision": 0.8710116366366366, "eval_recall": 0.8594744498999818, "eval_runtime": 5.0556, "eval_samples_per_second": 78.922, "eval_steps_per_second": 9.89, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.23294218016452475, "train_runtime": 1905.4788, "train_samples_per_second": 38.185, "train_steps_per_second": 1.281 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }