{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.803397178649902, "learning_rate": 4.75e-05, "loss": 0.5411, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7368421052631579, "eval_f1": 0.6508662716567915, "eval_loss": 0.49393221735954285, "eval_precision": 0.6761904761904762, "eval_recall": 0.6412984178941625, "eval_runtime": 1.7881, "eval_samples_per_second": 223.142, "eval_steps_per_second": 27.963, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.498361587524414, "learning_rate": 4.5e-05, "loss": 0.4231, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.7995262704565029, "eval_loss": 0.3851858377456665, "eval_precision": 0.7887596899224806, "eval_recall": 0.8183760683760684, "eval_runtime": 1.7893, "eval_samples_per_second": 222.988, "eval_steps_per_second": 27.943, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.0573930740356445, "learning_rate": 4.25e-05, "loss": 0.3331, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8080535642463387, "eval_loss": 0.33130019903182983, "eval_precision": 0.8232818379877204, "eval_recall": 0.796826695762866, "eval_runtime": 1.7976, "eval_samples_per_second": 221.96, "eval_steps_per_second": 27.815, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.250720500946045, "learning_rate": 4e-05, "loss": 0.2924, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8560793854229822, "eval_loss": 0.30566585063934326, "eval_precision": 0.8609538327526132, "eval_recall": 0.8516548463356974, "eval_runtime": 1.7957, "eval_samples_per_second": 222.195, "eval_steps_per_second": 27.844, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.8374593257904053, "learning_rate": 3.7500000000000003e-05, "loss": 0.2705, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8422176526415692, "eval_loss": 0.3068975508213043, "eval_precision": 0.8604724566416373, "eval_recall": 0.8288325150027278, "eval_runtime": 1.7956, "eval_samples_per_second": 222.215, "eval_steps_per_second": 27.846, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.305149078369141, "learning_rate": 3.5e-05, "loss": 0.2461, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8562182887453875, "eval_loss": 0.31193241477012634, "eval_precision": 0.8435805201992252, "eval_recall": 0.8763411529368976, "eval_runtime": 1.7944, "eval_samples_per_second": 222.356, "eval_steps_per_second": 27.864, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.4272942841053009, "learning_rate": 3.2500000000000004e-05, "loss": 0.2313, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8662440310793597, "eval_loss": 0.28799474239349365, "eval_precision": 0.8606158357771261, "eval_recall": 0.872704128023277, "eval_runtime": 1.8006, "eval_samples_per_second": 221.595, "eval_steps_per_second": 27.769, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.898254871368408, "learning_rate": 3e-05, "loss": 0.2183, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8676331036823873, "eval_loss": 0.27734559774398804, "eval_precision": 0.8749292230261088, "eval_recall": 0.8612474995453718, "eval_runtime": 1.7976, "eval_samples_per_second": 221.957, "eval_steps_per_second": 27.814, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.726850509643555, "learning_rate": 2.7500000000000004e-05, "loss": 0.2093, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8587719298245614, "eval_loss": 0.28041473031044006, "eval_precision": 0.864771021021021, "eval_recall": 0.8534278959810875, "eval_runtime": 1.7999, "eval_samples_per_second": 221.684, "eval_steps_per_second": 27.78, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.7861063480377197, "learning_rate": 2.5e-05, "loss": 0.1986, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8654532336864889, "eval_loss": 0.28901827335357666, "eval_precision": 0.8804269882659713, "eval_recall": 0.8537461356610292, "eval_runtime": 1.7942, "eval_samples_per_second": 222.384, "eval_steps_per_second": 27.868, "step": 1220 }, { "epoch": 11.0, "grad_norm": 1.18681001663208, "learning_rate": 2.25e-05, "loss": 0.1881, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8629148629148629, "eval_loss": 0.29107582569122314, "eval_precision": 0.8657894736842104, "eval_recall": 0.860201854882706, "eval_runtime": 1.8131, "eval_samples_per_second": 220.068, "eval_steps_per_second": 27.577, "step": 1342 }, { "epoch": 12.0, "grad_norm": 3.137617588043213, "learning_rate": 2e-05, "loss": 0.1802, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8568221901555235, "eval_loss": 0.28662246465682983, "eval_precision": 0.8596491228070176, "eval_recall": 0.8541553009638116, "eval_runtime": 1.8223, "eval_samples_per_second": 218.951, "eval_steps_per_second": 27.437, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.8551347851753235, "learning_rate": 1.75e-05, "loss": 0.169, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8564658408408408, "eval_loss": 0.2963576018810272, "eval_precision": 0.8697278911564625, "eval_recall": 0.8459265320967448, "eval_runtime": 1.8327, "eval_samples_per_second": 217.71, "eval_steps_per_second": 27.282, "step": 1586 }, { "epoch": 14.0, "grad_norm": 5.66387414932251, "learning_rate": 1.5e-05, "loss": 0.1709, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8629148629148629, "eval_loss": 0.29438090324401855, "eval_precision": 0.8657894736842104, "eval_recall": 0.860201854882706, "eval_runtime": 1.8299, "eval_samples_per_second": 218.043, "eval_steps_per_second": 27.324, "step": 1708 }, { "epoch": 15.0, "grad_norm": 3.363886833190918, "learning_rate": 1.25e-05, "loss": 0.1492, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8636104675452922, "eval_loss": 0.28655046224594116, "eval_precision": 0.8645363713902765, "eval_recall": 0.8627023095108202, "eval_runtime": 1.8286, "eval_samples_per_second": 218.201, "eval_steps_per_second": 27.344, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.6481318473815918, "learning_rate": 1e-05, "loss": 0.1493, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8742647058823529, "eval_loss": 0.2950553596019745, "eval_precision": 0.8707860158154468, "eval_recall": 0.8780232769594472, "eval_runtime": 1.8286, "eval_samples_per_second": 218.205, "eval_steps_per_second": 27.344, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.7666211724281311, "learning_rate": 7.5e-06, "loss": 0.1425, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8710526315789473, "eval_loss": 0.304831326007843, "eval_precision": 0.8772522522522522, "eval_recall": 0.8655210038188761, "eval_runtime": 1.7983, "eval_samples_per_second": 221.876, "eval_steps_per_second": 27.804, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.819899797439575, "learning_rate": 5e-06, "loss": 0.1375, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8790689216221131, "eval_loss": 0.298705130815506, "eval_precision": 0.8790689216221131, "eval_recall": 0.8790689216221131, "eval_runtime": 1.8038, "eval_samples_per_second": 221.2, "eval_steps_per_second": 27.719, "step": 2196 }, { "epoch": 19.0, "grad_norm": 1.7430284023284912, "learning_rate": 2.5e-06, "loss": 0.1326, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8778322106552358, "eval_loss": 0.30734923481941223, "eval_precision": 0.8818924438393465, "eval_recall": 0.8740680123658847, "eval_runtime": 1.8064, "eval_samples_per_second": 220.886, "eval_steps_per_second": 27.68, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.931983709335327, "learning_rate": 0.0, "loss": 0.1365, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8784574144023395, "eval_loss": 0.29978305101394653, "eval_precision": 0.8804194733619106, "eval_recall": 0.8765684669939988, "eval_runtime": 1.8111, "eval_samples_per_second": 220.313, "eval_steps_per_second": 27.608, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2259816083751741, "train_runtime": 638.5228, "train_samples_per_second": 113.951, "train_steps_per_second": 3.821 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }