{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.800751686096191, "learning_rate": 4.75e-05, "loss": 0.5455, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.6639335808580858, "eval_loss": 0.48765844106674194, "eval_precision": 0.7053086419753087, "eval_recall": 0.6512093107837789, "eval_runtime": 1.7695, "eval_samples_per_second": 225.487, "eval_steps_per_second": 28.257, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.828364849090576, "learning_rate": 4.5e-05, "loss": 0.4356, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8209674617878647, "eval_loss": 0.35374370217323303, "eval_precision": 0.810288627625787, "eval_recall": 0.8375613747954174, "eval_runtime": 1.7671, "eval_samples_per_second": 225.791, "eval_steps_per_second": 28.295, "step": 244 }, { "epoch": 3.0, "grad_norm": 7.537293910980225, "learning_rate": 4.25e-05, "loss": 0.3468, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8072960267885891, "eval_loss": 0.3415985107421875, "eval_precision": 0.8325657894736842, "eval_recall": 0.7910983815239134, "eval_runtime": 1.7644, "eval_samples_per_second": 226.134, "eval_steps_per_second": 28.338, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.427431106567383, "learning_rate": 4e-05, "loss": 0.3049, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8180088078011953, "eval_loss": 0.3125925660133362, "eval_precision": 0.8323930726843348, "eval_recall": 0.8071467539552646, "eval_runtime": 1.7656, "eval_samples_per_second": 225.985, "eval_steps_per_second": 28.319, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.060295820236206, "learning_rate": 3.7500000000000003e-05, "loss": 0.2673, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8315338681464504, "eval_loss": 0.29185813665390015, "eval_precision": 0.8299369747899159, "eval_recall": 0.8331969448990726, "eval_runtime": 1.7632, "eval_samples_per_second": 226.299, "eval_steps_per_second": 28.358, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.6104700565338135, "learning_rate": 3.5e-05, "loss": 0.2516, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8350734843845683, "eval_loss": 0.2822675108909607, "eval_precision": 0.8386812302741506, "eval_recall": 0.8317421349336243, "eval_runtime": 1.761, "eval_samples_per_second": 226.577, "eval_steps_per_second": 28.393, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.407837837934494, "learning_rate": 3.2500000000000004e-05, "loss": 0.2243, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8622085718274466, "eval_loss": 0.26883605122566223, "eval_precision": 0.8530168716042322, "eval_recall": 0.8741589379887251, "eval_runtime": 1.7626, "eval_samples_per_second": 226.37, "eval_steps_per_second": 28.367, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.6773055791854858, "learning_rate": 3e-05, "loss": 0.2157, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8696646341463414, "eval_loss": 0.2641172409057617, "eval_precision": 0.8807130969146075, "eval_recall": 0.8605200945626478, "eval_runtime": 1.7614, "eval_samples_per_second": 226.526, "eval_steps_per_second": 28.387, "step": 976 }, { "epoch": 9.0, "grad_norm": 3.9705936908721924, "learning_rate": 2.7500000000000004e-05, "loss": 0.2052, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8572517421602788, "eval_loss": 0.26267343759536743, "eval_precision": 0.8679426449878376, "eval_recall": 0.8484269867248591, "eval_runtime": 1.7598, "eval_samples_per_second": 226.73, "eval_steps_per_second": 28.412, "step": 1098 }, { "epoch": 10.0, "grad_norm": 13.646815299987793, "learning_rate": 2.5e-05, "loss": 0.1864, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8548402404302435, "eval_loss": 0.288084477186203, "eval_precision": 0.8737291514386338, "eval_recall": 0.8409256228405164, "eval_runtime": 1.7593, "eval_samples_per_second": 226.79, "eval_steps_per_second": 28.42, "step": 1220 }, { "epoch": 11.0, "grad_norm": 4.454102039337158, "learning_rate": 2.25e-05, "loss": 0.1928, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.867476104365797, "eval_loss": 0.2784619927406311, "eval_precision": 0.8592596863468634, "eval_recall": 0.8777050372795053, "eval_runtime": 1.7608, "eval_samples_per_second": 226.608, "eval_steps_per_second": 28.397, "step": 1342 }, { "epoch": 12.0, "grad_norm": 4.11415958404541, "learning_rate": 2e-05, "loss": 0.1804, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8758710801393728, "eval_loss": 0.2506408393383026, "eval_precision": 0.8870983228779925, "eval_recall": 0.8665666484815421, "eval_runtime": 1.7598, "eval_samples_per_second": 226.734, "eval_steps_per_second": 28.413, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.9822263717651367, "learning_rate": 1.75e-05, "loss": 0.1654, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8790689216221131, "eval_loss": 0.26640743017196655, "eval_precision": 0.8790689216221131, "eval_recall": 0.8790689216221131, "eval_runtime": 1.7621, "eval_samples_per_second": 226.429, "eval_steps_per_second": 28.375, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.247542381286621, "learning_rate": 1.5e-05, "loss": 0.1567, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8878351186601172, "eval_loss": 0.2660583257675171, "eval_precision": 0.879776516905975, "eval_recall": 0.8976177486815784, "eval_runtime": 1.7643, "eval_samples_per_second": 226.146, "eval_steps_per_second": 28.339, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1535786390304565, "learning_rate": 1.25e-05, "loss": 0.1438, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9097744360902256, "eval_f1": 0.8917003438084323, "eval_loss": 0.2614538073539734, "eval_precision": 0.8898109243697478, "eval_recall": 0.893662484088016, "eval_runtime": 1.7625, "eval_samples_per_second": 226.385, "eval_steps_per_second": 28.369, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.5205868482589722, "learning_rate": 1e-05, "loss": 0.1472, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8856836962422341, "eval_loss": 0.25552284717559814, "eval_precision": 0.8838235294117647, "eval_recall": 0.8876159301691217, "eval_runtime": 1.7611, "eval_samples_per_second": 226.564, "eval_steps_per_second": 28.392, "step": 1952 }, { "epoch": 17.0, "grad_norm": 6.205023288726807, "learning_rate": 7.5e-06, "loss": 0.1394, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8790689216221131, "eval_loss": 0.2647987902164459, "eval_precision": 0.8790689216221131, "eval_recall": 0.8790689216221131, "eval_runtime": 1.7605, "eval_samples_per_second": 226.638, "eval_steps_per_second": 28.401, "step": 2074 }, { "epoch": 18.0, "grad_norm": 1.5985056161880493, "learning_rate": 5e-06, "loss": 0.1387, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8862394957983193, "eval_loss": 0.26299694180488586, "eval_precision": 0.8826476009275225, "eval_recall": 0.8901163847972358, "eval_runtime": 1.7607, "eval_samples_per_second": 226.62, "eval_steps_per_second": 28.399, "step": 2196 }, { "epoch": 19.0, "grad_norm": 1.7635153532028198, "learning_rate": 2.5e-06, "loss": 0.1378, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8845345436822225, "eval_loss": 0.26893848180770874, "eval_precision": 0.8865278628291489, "eval_recall": 0.8826150209128933, "eval_runtime": 1.7631, "eval_samples_per_second": 226.305, "eval_steps_per_second": 28.359, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.3988428115844727, "learning_rate": 0.0, "loss": 0.1365, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8817957385392532, "eval_loss": 0.2682853937149048, "eval_precision": 0.8827677592299257, "eval_recall": 0.8808419712675032, "eval_runtime": 1.7608, "eval_samples_per_second": 226.596, "eval_steps_per_second": 28.396, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.22609603365913766, "train_runtime": 618.465, "train_samples_per_second": 117.646, "train_steps_per_second": 3.945 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }