|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.9568660259246826, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5556, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.6899266862170088, |
|
"eval_loss": 0.5147875547409058, |
|
"eval_precision": 0.6851714708898257, |
|
"eval_recall": 0.6970358246953992, |
|
"eval_runtime": 1.7157, |
|
"eval_samples_per_second": 232.562, |
|
"eval_steps_per_second": 29.143, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.367062091827393, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.476, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7869674185463659, |
|
"eval_f1": 0.7095448122296921, |
|
"eval_loss": 0.4507494866847992, |
|
"eval_precision": 0.7577399380804953, |
|
"eval_recall": 0.6917621385706492, |
|
"eval_runtime": 1.7126, |
|
"eval_samples_per_second": 232.973, |
|
"eval_steps_per_second": 29.195, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 8.358736991882324, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4238, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7644319076357912, |
|
"eval_loss": 0.4003293514251709, |
|
"eval_precision": 0.7957593330916999, |
|
"eval_recall": 0.7473176941262047, |
|
"eval_runtime": 1.7133, |
|
"eval_samples_per_second": 232.886, |
|
"eval_steps_per_second": 29.184, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.034170627593994, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3735, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.796615353247018, |
|
"eval_loss": 0.379879891872406, |
|
"eval_precision": 0.8088983050847458, |
|
"eval_recall": 0.7872340425531914, |
|
"eval_runtime": 1.7115, |
|
"eval_samples_per_second": 233.13, |
|
"eval_steps_per_second": 29.214, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.402078151702881, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3548, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8160386984618873, |
|
"eval_loss": 0.3634377121925354, |
|
"eval_precision": 0.8152632848784607, |
|
"eval_recall": 0.8168303327877796, |
|
"eval_runtime": 1.7124, |
|
"eval_samples_per_second": 233.013, |
|
"eval_steps_per_second": 29.2, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.384060382843018, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3213, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8136136136136136, |
|
"eval_loss": 0.35842451453208923, |
|
"eval_precision": 0.8076923076923077, |
|
"eval_recall": 0.820785597381342, |
|
"eval_runtime": 1.7164, |
|
"eval_samples_per_second": 232.462, |
|
"eval_steps_per_second": 29.131, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.8013863563537598, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3085, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8259397035145188, |
|
"eval_loss": 0.3317520022392273, |
|
"eval_precision": 0.844799331103679, |
|
"eval_recall": 0.8124659028914347, |
|
"eval_runtime": 1.712, |
|
"eval_samples_per_second": 233.056, |
|
"eval_steps_per_second": 29.205, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.293257236480713, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2981, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8237705942648565, |
|
"eval_loss": 0.3428766429424286, |
|
"eval_precision": 0.8722222222222222, |
|
"eval_recall": 0.7985088197854155, |
|
"eval_runtime": 1.712, |
|
"eval_samples_per_second": 233.064, |
|
"eval_steps_per_second": 29.206, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 9.226790428161621, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2788, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8439374185136896, |
|
"eval_loss": 0.33035776019096375, |
|
"eval_precision": 0.8794955044955045, |
|
"eval_recall": 0.822376795781051, |
|
"eval_runtime": 1.7106, |
|
"eval_samples_per_second": 233.253, |
|
"eval_steps_per_second": 29.23, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.4179863929748535, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.259, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.30757635831832886, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 1.7123, |
|
"eval_samples_per_second": 233.024, |
|
"eval_steps_per_second": 29.201, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.00750470161438, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2587, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8503151260504201, |
|
"eval_loss": 0.3025457561016083, |
|
"eval_precision": 0.8470628455912955, |
|
"eval_recall": 0.8538370612838698, |
|
"eval_runtime": 1.7097, |
|
"eval_samples_per_second": 233.369, |
|
"eval_steps_per_second": 29.244, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.474819660186768, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2391, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8564658408408408, |
|
"eval_loss": 0.2989969253540039, |
|
"eval_precision": 0.8697278911564625, |
|
"eval_recall": 0.8459265320967448, |
|
"eval_runtime": 1.7113, |
|
"eval_samples_per_second": 233.161, |
|
"eval_steps_per_second": 29.218, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 5.184099197387695, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2443, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8518472677764712, |
|
"eval_loss": 0.2918570339679718, |
|
"eval_precision": 0.8599810186649794, |
|
"eval_recall": 0.844880887434079, |
|
"eval_runtime": 1.7186, |
|
"eval_samples_per_second": 232.167, |
|
"eval_steps_per_second": 29.094, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.611837863922119, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.237, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8550328105883662, |
|
"eval_loss": 0.30395177006721497, |
|
"eval_precision": 0.8482905982905984, |
|
"eval_recall": 0.8631114748136025, |
|
"eval_runtime": 1.7146, |
|
"eval_samples_per_second": 232.704, |
|
"eval_steps_per_second": 29.161, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 5.530145168304443, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2176, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8682773109243698, |
|
"eval_loss": 0.2936829626560211, |
|
"eval_precision": 0.864855223259409, |
|
"eval_recall": 0.8719767230405528, |
|
"eval_runtime": 1.7134, |
|
"eval_samples_per_second": 232.875, |
|
"eval_steps_per_second": 29.182, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.9305107593536377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2202, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8560793854229822, |
|
"eval_loss": 0.29200199246406555, |
|
"eval_precision": 0.8609538327526132, |
|
"eval_recall": 0.8516548463356974, |
|
"eval_runtime": 1.7117, |
|
"eval_samples_per_second": 233.104, |
|
"eval_steps_per_second": 29.211, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 5.218233585357666, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2203, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8575487105473052, |
|
"eval_loss": 0.29227131605148315, |
|
"eval_precision": 0.8584592421103936, |
|
"eval_recall": 0.8566557555919259, |
|
"eval_runtime": 1.7107, |
|
"eval_samples_per_second": 233.232, |
|
"eval_steps_per_second": 29.227, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.9121828079223633, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2204, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8602260265626904, |
|
"eval_loss": 0.2926991283893585, |
|
"eval_precision": 0.8620943049601959, |
|
"eval_recall": 0.8584288052373159, |
|
"eval_runtime": 1.7115, |
|
"eval_samples_per_second": 233.126, |
|
"eval_steps_per_second": 29.214, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 9.12126636505127, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2124, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.29202741384506226, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 1.711, |
|
"eval_samples_per_second": 233.194, |
|
"eval_steps_per_second": 29.222, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.568925142288208, |
|
"learning_rate": 0.0, |
|
"loss": 0.2108, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8587719298245614, |
|
"eval_loss": 0.29165780544281006, |
|
"eval_precision": 0.864771021021021, |
|
"eval_recall": 0.8534278959810875, |
|
"eval_runtime": 1.7116, |
|
"eval_samples_per_second": 233.115, |
|
"eval_steps_per_second": 29.212, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7677008691480000.0, |
|
"train_loss": 0.29650945194431994, |
|
"train_runtime": 617.5878, |
|
"train_samples_per_second": 118.04, |
|
"train_steps_per_second": 3.951 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7677008691480000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|