{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.094232082366943, "learning_rate": 4.75e-05, "loss": 0.5608, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6526816032372154, "eval_loss": 0.5057180523872375, "eval_precision": 0.6593383311603651, "eval_recall": 0.6481633024186215, "eval_runtime": 5.1312, "eval_samples_per_second": 77.76, "eval_steps_per_second": 9.744, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.818559408187866, "learning_rate": 4.5e-05, "loss": 0.5012, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7518796992481203, "eval_f1": 0.7192637077573647, "eval_loss": 0.47916004061698914, "eval_precision": 0.7116519573339108, "eval_recall": 0.7369521731223858, "eval_runtime": 5.0532, "eval_samples_per_second": 78.96, "eval_steps_per_second": 9.895, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.010869026184082, "learning_rate": 4.25e-05, "loss": 0.4628, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7694235588972431, "eval_f1": 0.7320583941605839, "eval_loss": 0.4281270503997803, "eval_precision": 0.7255639097744361, "eval_recall": 0.7418621567557737, "eval_runtime": 5.0512, "eval_samples_per_second": 78.991, "eval_steps_per_second": 9.899, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.6188292503356934, "learning_rate": 4e-05, "loss": 0.4045, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.7764637262952703, "eval_loss": 0.39506053924560547, "eval_precision": 0.7802700348432056, "eval_recall": 0.7730496453900709, "eval_runtime": 5.0698, "eval_samples_per_second": 78.701, "eval_steps_per_second": 9.862, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.2819700241088867, "learning_rate": 3.7500000000000003e-05, "loss": 0.3701, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7994987468671679, "eval_f1": 0.7735784814436499, "eval_loss": 0.42390376329421997, "eval_precision": 0.7633219954648527, "eval_recall": 0.7956446626659393, "eval_runtime": 5.0535, "eval_samples_per_second": 78.955, "eval_steps_per_second": 9.894, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.4958856105804443, "learning_rate": 3.5e-05, "loss": 0.3362, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7974014336917563, "eval_loss": 0.3720650374889374, "eval_precision": 0.7934340756451043, "eval_recall": 0.801918530641935, "eval_runtime": 5.0487, "eval_samples_per_second": 79.03, "eval_steps_per_second": 9.903, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.6802586317062378, "learning_rate": 3.2500000000000004e-05, "loss": 0.3285, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.8077810218978102, "eval_loss": 0.372498095035553, "eval_precision": 0.7988721804511278, "eval_recall": 0.8204673577014002, "eval_runtime": 5.052, "eval_samples_per_second": 78.979, "eval_steps_per_second": 9.897, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.754953384399414, "learning_rate": 3e-05, "loss": 0.3061, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8109388749746158, "eval_loss": 0.3537139594554901, "eval_precision": 0.8087365591397849, "eval_recall": 0.8132842334969994, "eval_runtime": 5.0749, "eval_samples_per_second": 78.622, "eval_steps_per_second": 9.852, "step": 976 }, { "epoch": 9.0, "grad_norm": 19.56822395324707, "learning_rate": 2.7500000000000004e-05, "loss": 0.3017, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8109388749746158, "eval_loss": 0.3503880798816681, "eval_precision": 0.8087365591397849, "eval_recall": 0.8132842334969994, "eval_runtime": 5.1242, "eval_samples_per_second": 77.865, "eval_steps_per_second": 9.758, "step": 1098 }, { "epoch": 10.0, "grad_norm": 7.07460880279541, "learning_rate": 2.5e-05, "loss": 0.2942, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8186033824331697, "eval_loss": 0.33907514810562134, "eval_precision": 0.8186033824331697, "eval_recall": 0.8186033824331697, "eval_runtime": 5.0538, "eval_samples_per_second": 78.951, "eval_steps_per_second": 9.894, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.607100486755371, "learning_rate": 2.25e-05, "loss": 0.2715, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.821236559139785, "eval_loss": 0.34563252329826355, "eval_precision": 0.8169406150583245, "eval_recall": 0.8261047463175123, "eval_runtime": 5.0829, "eval_samples_per_second": 78.498, "eval_steps_per_second": 9.837, "step": 1342 }, { "epoch": 12.0, "grad_norm": 10.076610565185547, "learning_rate": 2e-05, "loss": 0.2703, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8262464479462671, "eval_loss": 0.353447824716568, "eval_precision": 0.8190008071955719, "eval_recall": 0.8353791598472449, "eval_runtime": 5.0811, "eval_samples_per_second": 78.526, "eval_steps_per_second": 9.84, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.735930323600769, "learning_rate": 1.75e-05, "loss": 0.2759, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8202661535994868, "eval_loss": 0.332623153924942, "eval_precision": 0.8228070175438597, "eval_recall": 0.8178759774504456, "eval_runtime": 5.0743, "eval_samples_per_second": 78.631, "eval_steps_per_second": 9.854, "step": 1586 }, { "epoch": 14.0, "grad_norm": 11.147435188293457, "learning_rate": 1.5e-05, "loss": 0.2705, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8289446964056049, "eval_loss": 0.33598676323890686, "eval_precision": 0.8266129032258065, "eval_recall": 0.8314238952536825, "eval_runtime": 5.046, "eval_samples_per_second": 79.073, "eval_steps_per_second": 9.909, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.3279173374176025, "learning_rate": 1.25e-05, "loss": 0.2576, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.839868600986979, "eval_loss": 0.3422936797142029, "eval_precision": 0.834029197080292, "eval_recall": 0.8467448627023095, "eval_runtime": 5.0825, "eval_samples_per_second": 78.505, "eval_steps_per_second": 9.838, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.688598155975342, "learning_rate": 1e-05, "loss": 0.2513, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8313646980313647, "eval_loss": 0.33944278955459595, "eval_precision": 0.825091575091575, "eval_recall": 0.8389252591380251, "eval_runtime": 5.0581, "eval_samples_per_second": 78.883, "eval_steps_per_second": 9.885, "step": 1952 }, { "epoch": 17.0, "grad_norm": 8.046936988830566, "learning_rate": 7.5e-06, "loss": 0.2481, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8281017346283209, "eval_loss": 0.32614341378211975, "eval_precision": 0.8272965800108572, "eval_recall": 0.8289234406255683, "eval_runtime": 5.1172, "eval_samples_per_second": 77.973, "eval_steps_per_second": 9.771, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.366418361663818, "learning_rate": 5e-06, "loss": 0.2561, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8365204824303285, "eval_loss": 0.3320069909095764, "eval_precision": 0.8313636363636363, "eval_recall": 0.8424713584288053, "eval_runtime": 5.0629, "eval_samples_per_second": 78.808, "eval_steps_per_second": 9.876, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.5271737575531006, "learning_rate": 2.5e-06, "loss": 0.2478, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8349466368826011, "eval_loss": 0.3268921673297882, "eval_precision": 0.8325716845878136, "eval_recall": 0.8374704491725768, "eval_runtime": 5.0569, "eval_samples_per_second": 78.901, "eval_steps_per_second": 9.887, "step": 2318 }, { "epoch": 20.0, "grad_norm": 10.424647331237793, "learning_rate": 0.0, "loss": 0.2451, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8357422474382676, "eval_loss": 0.32743164896965027, "eval_precision": 0.8319228265372551, "eval_recall": 0.8399709038006911, "eval_runtime": 5.068, "eval_samples_per_second": 78.729, "eval_steps_per_second": 9.866, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.32301358082255377, "train_runtime": 1951.5809, "train_samples_per_second": 37.283, "train_steps_per_second": 1.25 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }