|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.92190408706665, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5593, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.731829573934837, |
|
"eval_f1": 0.6531872202500386, |
|
"eval_loss": 0.5129385590553284, |
|
"eval_precision": 0.6696820563659538, |
|
"eval_recall": 0.6452536824877251, |
|
"eval_runtime": 1.7112, |
|
"eval_samples_per_second": 233.172, |
|
"eval_steps_per_second": 29.22, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.805196285247803, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.481, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.7054243048977545, |
|
"eval_loss": 0.4831171929836273, |
|
"eval_precision": 0.6993341053850608, |
|
"eval_recall": 0.7295417348608838, |
|
"eval_runtime": 1.7107, |
|
"eval_samples_per_second": 233.24, |
|
"eval_steps_per_second": 29.228, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.332752227783203, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4234, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8220551378446115, |
|
"eval_f1": 0.7739917826798037, |
|
"eval_loss": 0.3973781168460846, |
|
"eval_precision": 0.7925961082107262, |
|
"eval_recall": 0.7615930169121659, |
|
"eval_runtime": 1.7083, |
|
"eval_samples_per_second": 233.57, |
|
"eval_steps_per_second": 29.269, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.6373705863952637, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3701, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.7991821327461466, |
|
"eval_loss": 0.3780345618724823, |
|
"eval_precision": 0.8127623983206507, |
|
"eval_recall": 0.7890070921985816, |
|
"eval_runtime": 1.708, |
|
"eval_samples_per_second": 233.603, |
|
"eval_steps_per_second": 29.274, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 8.633732795715332, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3499, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.819530641752864, |
|
"eval_loss": 0.3612293004989624, |
|
"eval_precision": 0.8134920634920635, |
|
"eval_recall": 0.8268321513002364, |
|
"eval_runtime": 1.706, |
|
"eval_samples_per_second": 233.875, |
|
"eval_steps_per_second": 29.308, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.133713960647583, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3165, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.8072288313015957, |
|
"eval_loss": 0.3760314881801605, |
|
"eval_precision": 0.7952690166975882, |
|
"eval_recall": 0.8376523004182579, |
|
"eval_runtime": 1.7076, |
|
"eval_samples_per_second": 233.655, |
|
"eval_steps_per_second": 29.28, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.4740520715713501, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2968, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8419946387230413, |
|
"eval_loss": 0.33418920636177063, |
|
"eval_precision": 0.8437691365584814, |
|
"eval_recall": 0.8402891434806329, |
|
"eval_runtime": 1.708, |
|
"eval_samples_per_second": 233.608, |
|
"eval_steps_per_second": 29.274, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.327579021453857, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2812, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8350789627607721, |
|
"eval_loss": 0.3310687243938446, |
|
"eval_precision": 0.8463358876939919, |
|
"eval_recall": 0.8260138206946717, |
|
"eval_runtime": 1.707, |
|
"eval_samples_per_second": 233.739, |
|
"eval_steps_per_second": 29.291, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 8.512028694152832, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2682, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8454251965513313, |
|
"eval_loss": 0.32693037390708923, |
|
"eval_precision": 0.8463049835506276, |
|
"eval_recall": 0.8445626477541371, |
|
"eval_runtime": 1.7093, |
|
"eval_samples_per_second": 233.423, |
|
"eval_steps_per_second": 29.251, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.219644546508789, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2596, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8541488972828073, |
|
"eval_loss": 0.3144831955432892, |
|
"eval_precision": 0.8559859154929578, |
|
"eval_recall": 0.8523822513184216, |
|
"eval_runtime": 1.7104, |
|
"eval_samples_per_second": 233.278, |
|
"eval_steps_per_second": 29.233, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.501763820648193, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2464, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8377439939939939, |
|
"eval_loss": 0.31378793716430664, |
|
"eval_precision": 0.8503401360544218, |
|
"eval_recall": 0.8277868703400618, |
|
"eval_runtime": 1.7091, |
|
"eval_samples_per_second": 233.452, |
|
"eval_steps_per_second": 29.255, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 12.380316734313965, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2415, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8564658408408408, |
|
"eval_loss": 0.31256482005119324, |
|
"eval_precision": 0.8697278911564625, |
|
"eval_recall": 0.8459265320967448, |
|
"eval_runtime": 1.71, |
|
"eval_samples_per_second": 233.331, |
|
"eval_steps_per_second": 29.239, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.9941325187683105, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2354, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8521068445832446, |
|
"eval_loss": 0.3136414587497711, |
|
"eval_precision": 0.8693800752624282, |
|
"eval_recall": 0.8391525731951264, |
|
"eval_runtime": 1.7127, |
|
"eval_samples_per_second": 232.961, |
|
"eval_steps_per_second": 29.193, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 12.018842697143555, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2303, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8510304659498208, |
|
"eval_loss": 0.3171806037425995, |
|
"eval_precision": 0.8463237893248498, |
|
"eval_recall": 0.8563375159119839, |
|
"eval_runtime": 1.711, |
|
"eval_samples_per_second": 233.193, |
|
"eval_steps_per_second": 29.222, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.3128843307495117, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2172, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8537492688633261, |
|
"eval_loss": 0.3120233416557312, |
|
"eval_precision": 0.8656062850151329, |
|
"eval_recall": 0.8441534824513548, |
|
"eval_runtime": 1.7092, |
|
"eval_samples_per_second": 233.436, |
|
"eval_steps_per_second": 29.253, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.912248611450195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2159, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8357422474382676, |
|
"eval_loss": 0.31162190437316895, |
|
"eval_precision": 0.8319228265372551, |
|
"eval_recall": 0.8399709038006911, |
|
"eval_runtime": 1.7092, |
|
"eval_samples_per_second": 233.439, |
|
"eval_steps_per_second": 29.253, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 8.530476570129395, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2192, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8556621579112929, |
|
"eval_loss": 0.31227871775627136, |
|
"eval_precision": 0.871654421411703, |
|
"eval_recall": 0.8434260774686306, |
|
"eval_runtime": 1.7098, |
|
"eval_samples_per_second": 233.357, |
|
"eval_steps_per_second": 29.243, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 7.491436958312988, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2124, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.839868600986979, |
|
"eval_loss": 0.31498095393180847, |
|
"eval_precision": 0.834029197080292, |
|
"eval_recall": 0.8467448627023095, |
|
"eval_runtime": 1.7079, |
|
"eval_samples_per_second": 233.62, |
|
"eval_steps_per_second": 29.276, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 5.7076263427734375, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2077, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8526315789473684, |
|
"eval_loss": 0.3084200918674469, |
|
"eval_precision": 0.8585304054054055, |
|
"eval_recall": 0.8473813420621932, |
|
"eval_runtime": 1.7079, |
|
"eval_samples_per_second": 233.62, |
|
"eval_steps_per_second": 29.276, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.0706095695495605, |
|
"learning_rate": 0.0, |
|
"loss": 0.205, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8533986527862829, |
|
"eval_loss": 0.3077850043773651, |
|
"eval_precision": 0.8572003218020917, |
|
"eval_recall": 0.8498817966903074, |
|
"eval_runtime": 1.7073, |
|
"eval_samples_per_second": 233.696, |
|
"eval_steps_per_second": 29.285, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7662265464912000.0, |
|
"train_loss": 0.2918538148285913, |
|
"train_runtime": 616.0393, |
|
"train_samples_per_second": 118.109, |
|
"train_steps_per_second": 3.961 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7662265464912000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|