{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.279976844787598, "learning_rate": 4.75e-05, "loss": 0.564, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7142857142857143, "eval_f1": 0.6246286393345217, "eval_loss": 0.520961344242096, "eval_precision": 0.6432360742705571, "eval_recall": 0.6178396072013094, "eval_runtime": 1.7251, "eval_samples_per_second": 231.293, "eval_steps_per_second": 28.984, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.9077770709991455, "learning_rate": 4.5e-05, "loss": 0.5007, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7518796992481203, "eval_f1": 0.7122952431589911, "eval_loss": 0.4797453284263611, "eval_precision": 0.7062235989862011, "eval_recall": 0.7219494453537008, "eval_runtime": 1.727, "eval_samples_per_second": 231.043, "eval_steps_per_second": 28.953, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.219292640686035, "learning_rate": 4.25e-05, "loss": 0.428, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.7933804817423211, "eval_loss": 0.39089536666870117, "eval_precision": 0.7873726262158407, "eval_recall": 0.800872885979269, "eval_runtime": 1.7391, "eval_samples_per_second": 229.423, "eval_steps_per_second": 28.75, "step": 366 }, { "epoch": 4.0, "grad_norm": 5.223276615142822, "learning_rate": 4e-05, "loss": 0.3751, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8151164115613962, "eval_loss": 0.34778618812561035, "eval_precision": 0.8159193371512123, "eval_recall": 0.8143298781596654, "eval_runtime": 1.7324, "eval_samples_per_second": 230.322, "eval_steps_per_second": 28.862, "step": 488 }, { "epoch": 5.0, "grad_norm": 5.300344467163086, "learning_rate": 3.7500000000000003e-05, "loss": 0.339, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8280070158749033, "eval_loss": 0.33690375089645386, "eval_precision": 0.8223795620437956, "eval_recall": 0.8346517548645208, "eval_runtime": 1.7412, "eval_samples_per_second": 229.147, "eval_steps_per_second": 28.715, "step": 610 }, { "epoch": 6.0, "grad_norm": 6.1557698249816895, "learning_rate": 3.5e-05, "loss": 0.3096, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.844327731092437, "eval_loss": 0.3206353187561035, "eval_precision": 0.8411320530352577, "eval_recall": 0.8477905073649754, "eval_runtime": 1.7364, "eval_samples_per_second": 229.787, "eval_steps_per_second": 28.795, "step": 732 }, { "epoch": 7.0, "grad_norm": 2.458906412124634, "learning_rate": 3.2500000000000004e-05, "loss": 0.2931, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8306935047100303, "eval_loss": 0.31404000520706177, "eval_precision": 0.8372758729160114, "eval_recall": 0.8249681760320058, "eval_runtime": 1.74, "eval_samples_per_second": 229.312, "eval_steps_per_second": 28.736, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.627723217010498, "learning_rate": 3e-05, "loss": 0.2765, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8461962888779714, "eval_loss": 0.30450862646102905, "eval_precision": 0.8453465227094517, "eval_recall": 0.8470631023822512, "eval_runtime": 1.739, "eval_samples_per_second": 229.443, "eval_steps_per_second": 28.752, "step": 976 }, { "epoch": 9.0, "grad_norm": 3.7235734462738037, "learning_rate": 2.7500000000000004e-05, "loss": 0.2637, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8556004584112431, "eval_loss": 0.3003471791744232, "eval_precision": 0.8538865546218487, "eval_recall": 0.85738316057465, "eval_runtime": 1.7315, "eval_samples_per_second": 230.44, "eval_steps_per_second": 28.877, "step": 1098 }, { "epoch": 10.0, "grad_norm": 5.832447052001953, "learning_rate": 2.5e-05, "loss": 0.2601, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8548827059465357, "eval_loss": 0.2909964919090271, "eval_precision": 0.8548827059465357, "eval_recall": 0.8548827059465357, "eval_runtime": 1.7405, "eval_samples_per_second": 229.242, "eval_steps_per_second": 28.727, "step": 1220 }, { "epoch": 11.0, "grad_norm": 12.791139602661133, "learning_rate": 2.25e-05, "loss": 0.2547, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8641933287950987, "eval_loss": 0.28499358892440796, "eval_precision": 0.872603606453654, "eval_recall": 0.8569739952718676, "eval_runtime": 1.7358, "eval_samples_per_second": 229.86, "eval_steps_per_second": 28.804, "step": 1342 }, { "epoch": 12.0, "grad_norm": 9.72183895111084, "learning_rate": 2e-05, "loss": 0.2426, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8696722245432793, "eval_loss": 0.279752641916275, "eval_precision": 0.8706135006701596, "eval_recall": 0.8687488634297145, "eval_runtime": 1.7375, "eval_samples_per_second": 229.634, "eval_steps_per_second": 28.776, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.6714916229248047, "learning_rate": 1.75e-05, "loss": 0.2319, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8661961395983623, "eval_loss": 0.28113481402397156, "eval_precision": 0.8784532165625604, "eval_recall": 0.8562465902891435, "eval_runtime": 1.7421, "eval_samples_per_second": 229.035, "eval_steps_per_second": 28.701, "step": 1586 }, { "epoch": 14.0, "grad_norm": 7.962621688842773, "learning_rate": 1.5e-05, "loss": 0.2359, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.2719880938529968, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 1.7325, "eval_samples_per_second": 230.305, "eval_steps_per_second": 28.86, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.756800413131714, "learning_rate": 1.25e-05, "loss": 0.2229, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8736504011098378, "eval_loss": 0.2721886932849884, "eval_precision": 0.8718487394957983, "eval_recall": 0.8755228223313329, "eval_runtime": 1.7352, "eval_samples_per_second": 229.939, "eval_steps_per_second": 28.814, "step": 1830 }, { "epoch": 16.0, "grad_norm": 3.3028435707092285, "learning_rate": 1e-05, "loss": 0.2218, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8649563392675828, "eval_loss": 0.2730589210987091, "eval_precision": 0.8623655913978494, "eval_recall": 0.8677032187670486, "eval_runtime": 1.7334, "eval_samples_per_second": 230.181, "eval_steps_per_second": 28.845, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.6590169668197632, "learning_rate": 7.5e-06, "loss": 0.2174, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8696722245432793, "eval_loss": 0.27383190393447876, "eval_precision": 0.8706135006701596, "eval_recall": 0.8687488634297145, "eval_runtime": 1.738, "eval_samples_per_second": 229.575, "eval_steps_per_second": 28.769, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.2556533813476562, "learning_rate": 5e-06, "loss": 0.2165, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8703223612108386, "eval_loss": 0.27394846081733704, "eval_precision": 0.8694131129742446, "eval_recall": 0.8712493180578287, "eval_runtime": 1.738, "eval_samples_per_second": 229.571, "eval_steps_per_second": 28.768, "step": 2196 }, { "epoch": 19.0, "grad_norm": 7.895025253295898, "learning_rate": 2.5e-06, "loss": 0.2153, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8751002084335417, "eval_loss": 0.2726791203022003, "eval_precision": 0.8780701754385964, "eval_recall": 0.8722949627204946, "eval_runtime": 1.7412, "eval_samples_per_second": 229.156, "eval_steps_per_second": 28.716, "step": 2318 }, { "epoch": 20.0, "grad_norm": 4.127798080444336, "learning_rate": 0.0, "loss": 0.2159, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8717238211879976, "eval_loss": 0.2725962698459625, "eval_precision": 0.8757194133300328, "eval_recall": 0.8680214584469903, "eval_runtime": 1.7378, "eval_samples_per_second": 229.603, "eval_steps_per_second": 28.772, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7635661149264000.0, "train_loss": 0.29423871978384547, "train_runtime": 626.8339, "train_samples_per_second": 116.075, "train_steps_per_second": 3.893 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7635661149264000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }