{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.458889961242676, "learning_rate": 4.75e-05, "loss": 0.5533, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7268170426065163, "eval_f1": 0.6326680574676724, "eval_loss": 0.5133728384971619, "eval_precision": 0.6605799373040753, "eval_recall": 0.6242044008001455, "eval_runtime": 1.774, "eval_samples_per_second": 224.915, "eval_steps_per_second": 28.185, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.8158535957336426, "learning_rate": 4.5e-05, "loss": 0.4779, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7418546365914787, "eval_f1": 0.7122401394791937, "eval_loss": 0.4949621260166168, "eval_precision": 0.7053803339517626, "eval_recall": 0.734860883797054, "eval_runtime": 1.7746, "eval_samples_per_second": 224.844, "eval_steps_per_second": 28.176, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.156679630279541, "learning_rate": 4.25e-05, "loss": 0.4097, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.7664715719063545, "eval_loss": 0.3772188425064087, "eval_precision": 0.8092877840475827, "eval_recall": 0.7458628841607565, "eval_runtime": 1.8118, "eval_samples_per_second": 220.228, "eval_steps_per_second": 27.597, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.239713191986084, "learning_rate": 4e-05, "loss": 0.3451, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8169941409717701, "eval_loss": 0.3511227071285248, "eval_precision": 0.8104735988883742, "eval_recall": 0.8250591016548463, "eval_runtime": 1.8097, "eval_samples_per_second": 220.479, "eval_steps_per_second": 27.629, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.155226707458496, "learning_rate": 3.7500000000000003e-05, "loss": 0.2959, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8255172205802521, "eval_loss": 0.32013869285583496, "eval_precision": 0.8239495798319327, "eval_recall": 0.8271503909801782, "eval_runtime": 1.81, "eval_samples_per_second": 220.445, "eval_steps_per_second": 27.625, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.97943115234375, "learning_rate": 3.5e-05, "loss": 0.2727, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8447157518450185, "eval_loss": 0.3176342844963074, "eval_precision": 0.8325401217487549, "eval_recall": 0.864248045099109, "eval_runtime": 1.8128, "eval_samples_per_second": 220.097, "eval_steps_per_second": 27.581, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.0954539775848389, "learning_rate": 3.2500000000000004e-05, "loss": 0.2595, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8524146298159436, "eval_loss": 0.2958522439002991, "eval_precision": 0.8451250578971746, "eval_recall": 0.8613384251682124, "eval_runtime": 1.8171, "eval_samples_per_second": 219.582, "eval_steps_per_second": 27.516, "step": 854 }, { "epoch": 8.0, "grad_norm": 6.516312122344971, "learning_rate": 3e-05, "loss": 0.2409, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8649122807017544, "eval_loss": 0.28329744935035706, "eval_precision": 0.8710116366366366, "eval_recall": 0.8594744498999818, "eval_runtime": 1.8171, "eval_samples_per_second": 219.577, "eval_steps_per_second": 27.516, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.709987163543701, "learning_rate": 2.7500000000000004e-05, "loss": 0.2298, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.2893889546394348, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 1.8128, "eval_samples_per_second": 220.099, "eval_steps_per_second": 27.581, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.345912933349609, "learning_rate": 2.5e-05, "loss": 0.2221, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8614765038536611, "eval_loss": 0.2884393632411957, "eval_precision": 0.8686536646744258, "eval_recall": 0.8552009456264775, "eval_runtime": 1.8154, "eval_samples_per_second": 219.786, "eval_steps_per_second": 27.542, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.3872387409210205, "learning_rate": 2.25e-05, "loss": 0.1986, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8587719298245614, "eval_loss": 0.2855367362499237, "eval_precision": 0.864771021021021, "eval_recall": 0.8534278959810875, "eval_runtime": 1.8165, "eval_samples_per_second": 219.656, "eval_steps_per_second": 27.526, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.007177829742432, "learning_rate": 2e-05, "loss": 0.1964, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8521068445832446, "eval_loss": 0.29210031032562256, "eval_precision": 0.8693800752624282, "eval_recall": 0.8391525731951264, "eval_runtime": 1.8152, "eval_samples_per_second": 219.815, "eval_steps_per_second": 27.546, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.4582099914550781, "learning_rate": 1.75e-05, "loss": 0.1783, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8649122807017544, "eval_loss": 0.3103856146335602, "eval_precision": 0.8710116366366366, "eval_recall": 0.8594744498999818, "eval_runtime": 1.8138, "eval_samples_per_second": 219.982, "eval_steps_per_second": 27.567, "step": 1586 }, { "epoch": 14.0, "grad_norm": 6.738508701324463, "learning_rate": 1.5e-05, "loss": 0.1788, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8689068100358424, "eval_loss": 0.3015482425689697, "eval_precision": 0.863953693884765, "eval_recall": 0.8744771776686671, "eval_runtime": 1.8152, "eval_samples_per_second": 219.806, "eval_steps_per_second": 27.545, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.6257522106170654, "learning_rate": 1.25e-05, "loss": 0.172, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8595070422535211, "eval_loss": 0.3011764883995056, "eval_precision": 0.8633733523114054, "eval_recall": 0.8559283506092017, "eval_runtime": 1.8131, "eval_samples_per_second": 220.062, "eval_steps_per_second": 27.577, "step": 1830 }, { "epoch": 16.0, "grad_norm": 4.563363552093506, "learning_rate": 1e-05, "loss": 0.1563, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8695225637671682, "eval_loss": 0.3159307837486267, "eval_precision": 0.8631532846715328, "eval_recall": 0.8769776322967813, "eval_runtime": 1.8172, "eval_samples_per_second": 219.574, "eval_steps_per_second": 27.516, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.206107258796692, "learning_rate": 7.5e-06, "loss": 0.1512, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8572517421602788, "eval_loss": 0.32489535212516785, "eval_precision": 0.8679426449878376, "eval_recall": 0.8484269867248591, "eval_runtime": 1.8131, "eval_samples_per_second": 220.063, "eval_steps_per_second": 27.577, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.8656744956970215, "learning_rate": 5e-06, "loss": 0.151, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.855319904024935, "eval_loss": 0.3245084285736084, "eval_precision": 0.862378106322743, "eval_recall": 0.8491543917075832, "eval_runtime": 1.8159, "eval_samples_per_second": 219.726, "eval_steps_per_second": 27.535, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.600020170211792, "learning_rate": 2.5e-06, "loss": 0.1461, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8614765038536611, "eval_loss": 0.328171044588089, "eval_precision": 0.8686536646744258, "eval_recall": 0.8552009456264775, "eval_runtime": 1.8138, "eval_samples_per_second": 219.976, "eval_steps_per_second": 27.566, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.394913673400879, "learning_rate": 0.0, "loss": 0.1555, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.32483023405075073, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 1.8125, "eval_samples_per_second": 220.138, "eval_steps_per_second": 27.586, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2495564101172275, "train_runtime": 624.4298, "train_samples_per_second": 116.522, "train_steps_per_second": 3.908 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }