{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.92190408706665, "learning_rate": 4.75e-05, "loss": 0.5593, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.731829573934837, "eval_f1": 0.6531872202500386, "eval_loss": 0.5129385590553284, "eval_precision": 0.6696820563659538, "eval_recall": 0.6452536824877251, "eval_runtime": 1.7112, "eval_samples_per_second": 233.172, "eval_steps_per_second": 29.22, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.805196285247803, "learning_rate": 4.5e-05, "loss": 0.481, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7343358395989975, "eval_f1": 0.7054243048977545, "eval_loss": 0.4831171929836273, "eval_precision": 0.6993341053850608, "eval_recall": 0.7295417348608838, "eval_runtime": 1.7107, "eval_samples_per_second": 233.24, "eval_steps_per_second": 29.228, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.332752227783203, "learning_rate": 4.25e-05, "loss": 0.4234, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8220551378446115, "eval_f1": 0.7739917826798037, "eval_loss": 0.3973781168460846, "eval_precision": 0.7925961082107262, "eval_recall": 0.7615930169121659, "eval_runtime": 1.7083, "eval_samples_per_second": 233.57, "eval_steps_per_second": 29.269, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.6373705863952637, "learning_rate": 4e-05, "loss": 0.3701, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.7991821327461466, "eval_loss": 0.3780345618724823, "eval_precision": 0.8127623983206507, "eval_recall": 0.7890070921985816, "eval_runtime": 1.708, "eval_samples_per_second": 233.603, "eval_steps_per_second": 29.274, "step": 488 }, { "epoch": 5.0, "grad_norm": 8.633732795715332, "learning_rate": 3.7500000000000003e-05, "loss": 0.3499, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.819530641752864, "eval_loss": 0.3612293004989624, "eval_precision": 0.8134920634920635, "eval_recall": 0.8268321513002364, "eval_runtime": 1.706, "eval_samples_per_second": 233.875, "eval_steps_per_second": 29.308, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.133713960647583, "learning_rate": 3.5e-05, "loss": 0.3165, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.8072288313015957, "eval_loss": 0.3760314881801605, "eval_precision": 0.7952690166975882, "eval_recall": 0.8376523004182579, "eval_runtime": 1.7076, "eval_samples_per_second": 233.655, "eval_steps_per_second": 29.28, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.4740520715713501, "learning_rate": 3.2500000000000004e-05, "loss": 0.2968, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8419946387230413, "eval_loss": 0.33418920636177063, "eval_precision": 0.8437691365584814, "eval_recall": 0.8402891434806329, "eval_runtime": 1.708, "eval_samples_per_second": 233.608, "eval_steps_per_second": 29.274, "step": 854 }, { "epoch": 8.0, "grad_norm": 5.327579021453857, "learning_rate": 3e-05, "loss": 0.2812, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8350789627607721, "eval_loss": 0.3310687243938446, "eval_precision": 0.8463358876939919, "eval_recall": 0.8260138206946717, "eval_runtime": 1.707, "eval_samples_per_second": 233.739, "eval_steps_per_second": 29.291, "step": 976 }, { "epoch": 9.0, "grad_norm": 8.512028694152832, "learning_rate": 2.7500000000000004e-05, "loss": 0.2682, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8454251965513313, "eval_loss": 0.32693037390708923, "eval_precision": 0.8463049835506276, "eval_recall": 0.8445626477541371, "eval_runtime": 1.7093, "eval_samples_per_second": 233.423, "eval_steps_per_second": 29.251, "step": 1098 }, { "epoch": 10.0, "grad_norm": 5.219644546508789, "learning_rate": 2.5e-05, "loss": 0.2596, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8541488972828073, "eval_loss": 0.3144831955432892, "eval_precision": 0.8559859154929578, "eval_recall": 0.8523822513184216, "eval_runtime": 1.7104, "eval_samples_per_second": 233.278, "eval_steps_per_second": 29.233, "step": 1220 }, { "epoch": 11.0, "grad_norm": 6.501763820648193, "learning_rate": 2.25e-05, "loss": 0.2464, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8377439939939939, "eval_loss": 0.31378793716430664, "eval_precision": 0.8503401360544218, "eval_recall": 0.8277868703400618, "eval_runtime": 1.7091, "eval_samples_per_second": 233.452, "eval_steps_per_second": 29.255, "step": 1342 }, { "epoch": 12.0, "grad_norm": 12.380316734313965, "learning_rate": 2e-05, "loss": 0.2415, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8564658408408408, "eval_loss": 0.31256482005119324, "eval_precision": 0.8697278911564625, "eval_recall": 0.8459265320967448, "eval_runtime": 1.71, "eval_samples_per_second": 233.331, "eval_steps_per_second": 29.239, "step": 1464 }, { "epoch": 13.0, "grad_norm": 4.9941325187683105, "learning_rate": 1.75e-05, "loss": 0.2354, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8521068445832446, "eval_loss": 0.3136414587497711, "eval_precision": 0.8693800752624282, "eval_recall": 0.8391525731951264, "eval_runtime": 1.7127, "eval_samples_per_second": 232.961, "eval_steps_per_second": 29.193, "step": 1586 }, { "epoch": 14.0, "grad_norm": 12.018842697143555, "learning_rate": 1.5e-05, "loss": 0.2303, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8510304659498208, "eval_loss": 0.3171806037425995, "eval_precision": 0.8463237893248498, "eval_recall": 0.8563375159119839, "eval_runtime": 1.711, "eval_samples_per_second": 233.193, "eval_steps_per_second": 29.222, "step": 1708 }, { "epoch": 15.0, "grad_norm": 3.3128843307495117, "learning_rate": 1.25e-05, "loss": 0.2172, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8537492688633261, "eval_loss": 0.3120233416557312, "eval_precision": 0.8656062850151329, "eval_recall": 0.8441534824513548, "eval_runtime": 1.7092, "eval_samples_per_second": 233.436, "eval_steps_per_second": 29.253, "step": 1830 }, { "epoch": 16.0, "grad_norm": 5.912248611450195, "learning_rate": 1e-05, "loss": 0.2159, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8357422474382676, "eval_loss": 0.31162190437316895, "eval_precision": 0.8319228265372551, "eval_recall": 0.8399709038006911, "eval_runtime": 1.7092, "eval_samples_per_second": 233.439, "eval_steps_per_second": 29.253, "step": 1952 }, { "epoch": 17.0, "grad_norm": 8.530476570129395, "learning_rate": 7.5e-06, "loss": 0.2192, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8556621579112929, "eval_loss": 0.31227871775627136, "eval_precision": 0.871654421411703, "eval_recall": 0.8434260774686306, "eval_runtime": 1.7098, "eval_samples_per_second": 233.357, "eval_steps_per_second": 29.243, "step": 2074 }, { "epoch": 18.0, "grad_norm": 7.491436958312988, "learning_rate": 5e-06, "loss": 0.2124, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.839868600986979, "eval_loss": 0.31498095393180847, "eval_precision": 0.834029197080292, "eval_recall": 0.8467448627023095, "eval_runtime": 1.7079, "eval_samples_per_second": 233.62, "eval_steps_per_second": 29.276, "step": 2196 }, { "epoch": 19.0, "grad_norm": 5.7076263427734375, "learning_rate": 2.5e-06, "loss": 0.2077, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8526315789473684, "eval_loss": 0.3084200918674469, "eval_precision": 0.8585304054054055, "eval_recall": 0.8473813420621932, "eval_runtime": 1.7079, "eval_samples_per_second": 233.62, "eval_steps_per_second": 29.276, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.0706095695495605, "learning_rate": 0.0, "loss": 0.205, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.3077850043773651, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.7073, "eval_samples_per_second": 233.696, "eval_steps_per_second": 29.285, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7662265464912000.0, "train_loss": 0.2918538148285913, "train_runtime": 616.0393, "train_samples_per_second": 118.109, "train_steps_per_second": 3.961 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7662265464912000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }