{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.9568660259246826, "learning_rate": 4.75e-05, "loss": 0.5556, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7343358395989975, "eval_f1": 0.6899266862170088, "eval_loss": 0.5147875547409058, "eval_precision": 0.6851714708898257, "eval_recall": 0.6970358246953992, "eval_runtime": 1.7157, "eval_samples_per_second": 232.562, "eval_steps_per_second": 29.143, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.367062091827393, "learning_rate": 4.5e-05, "loss": 0.476, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7869674185463659, "eval_f1": 0.7095448122296921, "eval_loss": 0.4507494866847992, "eval_precision": 0.7577399380804953, "eval_recall": 0.6917621385706492, "eval_runtime": 1.7126, "eval_samples_per_second": 232.973, "eval_steps_per_second": 29.195, "step": 244 }, { "epoch": 3.0, "grad_norm": 8.358736991882324, "learning_rate": 4.25e-05, "loss": 0.4238, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8195488721804511, "eval_f1": 0.7644319076357912, "eval_loss": 0.4003293514251709, "eval_precision": 0.7957593330916999, "eval_recall": 0.7473176941262047, "eval_runtime": 1.7133, "eval_samples_per_second": 232.886, "eval_steps_per_second": 29.184, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.034170627593994, "learning_rate": 4e-05, "loss": 0.3735, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.796615353247018, "eval_loss": 0.379879891872406, "eval_precision": 0.8088983050847458, "eval_recall": 0.7872340425531914, "eval_runtime": 1.7115, "eval_samples_per_second": 233.13, "eval_steps_per_second": 29.214, "step": 488 }, { "epoch": 5.0, "grad_norm": 4.402078151702881, "learning_rate": 3.7500000000000003e-05, "loss": 0.3548, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8160386984618873, "eval_loss": 0.3634377121925354, "eval_precision": 0.8152632848784607, "eval_recall": 0.8168303327877796, "eval_runtime": 1.7124, "eval_samples_per_second": 233.013, "eval_steps_per_second": 29.2, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.384060382843018, "learning_rate": 3.5e-05, "loss": 0.3213, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8136136136136136, "eval_loss": 0.35842451453208923, "eval_precision": 0.8076923076923077, "eval_recall": 0.820785597381342, "eval_runtime": 1.7164, "eval_samples_per_second": 232.462, "eval_steps_per_second": 29.131, "step": 732 }, { "epoch": 7.0, "grad_norm": 3.8013863563537598, "learning_rate": 3.2500000000000004e-05, "loss": 0.3085, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8259397035145188, "eval_loss": 0.3317520022392273, "eval_precision": 0.844799331103679, "eval_recall": 0.8124659028914347, "eval_runtime": 1.712, "eval_samples_per_second": 233.056, "eval_steps_per_second": 29.205, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.293257236480713, "learning_rate": 3e-05, "loss": 0.2981, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8237705942648565, "eval_loss": 0.3428766429424286, "eval_precision": 0.8722222222222222, "eval_recall": 0.7985088197854155, "eval_runtime": 1.712, "eval_samples_per_second": 233.064, "eval_steps_per_second": 29.206, "step": 976 }, { "epoch": 9.0, "grad_norm": 9.226790428161621, "learning_rate": 2.7500000000000004e-05, "loss": 0.2788, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8439374185136896, "eval_loss": 0.33035776019096375, "eval_precision": 0.8794955044955045, "eval_recall": 0.822376795781051, "eval_runtime": 1.7106, "eval_samples_per_second": 233.253, "eval_steps_per_second": 29.23, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.4179863929748535, "learning_rate": 2.5e-05, "loss": 0.259, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.30757635831832886, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 1.7123, "eval_samples_per_second": 233.024, "eval_steps_per_second": 29.201, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.00750470161438, "learning_rate": 2.25e-05, "loss": 0.2587, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8503151260504201, "eval_loss": 0.3025457561016083, "eval_precision": 0.8470628455912955, "eval_recall": 0.8538370612838698, "eval_runtime": 1.7097, "eval_samples_per_second": 233.369, "eval_steps_per_second": 29.244, "step": 1342 }, { "epoch": 12.0, "grad_norm": 4.474819660186768, "learning_rate": 2e-05, "loss": 0.2391, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8564658408408408, "eval_loss": 0.2989969253540039, "eval_precision": 0.8697278911564625, "eval_recall": 0.8459265320967448, "eval_runtime": 1.7113, "eval_samples_per_second": 233.161, "eval_steps_per_second": 29.218, "step": 1464 }, { "epoch": 13.0, "grad_norm": 5.184099197387695, "learning_rate": 1.75e-05, "loss": 0.2443, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8518472677764712, "eval_loss": 0.2918570339679718, "eval_precision": 0.8599810186649794, "eval_recall": 0.844880887434079, "eval_runtime": 1.7186, "eval_samples_per_second": 232.167, "eval_steps_per_second": 29.094, "step": 1586 }, { "epoch": 14.0, "grad_norm": 6.611837863922119, "learning_rate": 1.5e-05, "loss": 0.237, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8550328105883662, "eval_loss": 0.30395177006721497, "eval_precision": 0.8482905982905984, "eval_recall": 0.8631114748136025, "eval_runtime": 1.7146, "eval_samples_per_second": 232.704, "eval_steps_per_second": 29.161, "step": 1708 }, { "epoch": 15.0, "grad_norm": 5.530145168304443, "learning_rate": 1.25e-05, "loss": 0.2176, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8682773109243698, "eval_loss": 0.2936829626560211, "eval_precision": 0.864855223259409, "eval_recall": 0.8719767230405528, "eval_runtime": 1.7134, "eval_samples_per_second": 232.875, "eval_steps_per_second": 29.182, "step": 1830 }, { "epoch": 16.0, "grad_norm": 3.9305107593536377, "learning_rate": 1e-05, "loss": 0.2202, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8560793854229822, "eval_loss": 0.29200199246406555, "eval_precision": 0.8609538327526132, "eval_recall": 0.8516548463356974, "eval_runtime": 1.7117, "eval_samples_per_second": 233.104, "eval_steps_per_second": 29.211, "step": 1952 }, { "epoch": 17.0, "grad_norm": 5.218233585357666, "learning_rate": 7.5e-06, "loss": 0.2203, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8575487105473052, "eval_loss": 0.29227131605148315, "eval_precision": 0.8584592421103936, "eval_recall": 0.8566557555919259, "eval_runtime": 1.7107, "eval_samples_per_second": 233.232, "eval_steps_per_second": 29.227, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.9121828079223633, "learning_rate": 5e-06, "loss": 0.2204, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8602260265626904, "eval_loss": 0.2926991283893585, "eval_precision": 0.8620943049601959, "eval_recall": 0.8584288052373159, "eval_runtime": 1.7115, "eval_samples_per_second": 233.126, "eval_steps_per_second": 29.214, "step": 2196 }, { "epoch": 19.0, "grad_norm": 9.12126636505127, "learning_rate": 2.5e-06, "loss": 0.2124, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.29202741384506226, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 1.711, "eval_samples_per_second": 233.194, "eval_steps_per_second": 29.222, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.568925142288208, "learning_rate": 0.0, "loss": 0.2108, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8587719298245614, "eval_loss": 0.29165780544281006, "eval_precision": 0.864771021021021, "eval_recall": 0.8534278959810875, "eval_runtime": 1.7116, "eval_samples_per_second": 233.115, "eval_steps_per_second": 29.212, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7677008691480000.0, "train_loss": 0.29650945194431994, "train_runtime": 617.5878, "train_samples_per_second": 118.04, "train_steps_per_second": 3.951 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7677008691480000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }