|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.052820205688477, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5445, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7518796992481203, |
|
"eval_f1": 0.6708167704192605, |
|
"eval_loss": 0.48936620354652405, |
|
"eval_precision": 0.6988095238095238, |
|
"eval_recall": 0.6594380796508457, |
|
"eval_runtime": 5.1391, |
|
"eval_samples_per_second": 77.64, |
|
"eval_steps_per_second": 9.729, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.3890905380249023, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.432, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7954312407428505, |
|
"eval_loss": 0.36909204721450806, |
|
"eval_precision": 0.7842791202823785, |
|
"eval_recall": 0.8173304237134025, |
|
"eval_runtime": 5.0614, |
|
"eval_samples_per_second": 78.833, |
|
"eval_steps_per_second": 9.879, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.7538790702819824, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3342, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.7956919505700826, |
|
"eval_loss": 0.33010444045066833, |
|
"eval_precision": 0.85742518351214, |
|
"eval_recall": 0.7685942898708855, |
|
"eval_runtime": 5.0669, |
|
"eval_samples_per_second": 78.747, |
|
"eval_steps_per_second": 9.868, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.051776170730591, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2846, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8502252252252251, |
|
"eval_loss": 0.2885676324367523, |
|
"eval_precision": 0.863265306122449, |
|
"eval_recall": 0.8398799781778505, |
|
"eval_runtime": 5.0493, |
|
"eval_samples_per_second": 79.021, |
|
"eval_steps_per_second": 9.902, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.2031478881835938, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2621, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8488361520276414, |
|
"eval_loss": 0.27282822132110596, |
|
"eval_precision": 0.8488361520276414, |
|
"eval_recall": 0.8488361520276414, |
|
"eval_runtime": 5.0626, |
|
"eval_samples_per_second": 78.814, |
|
"eval_steps_per_second": 9.876, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.391174554824829, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2419, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8503151260504201, |
|
"eval_loss": 0.27526727318763733, |
|
"eval_precision": 0.8470628455912955, |
|
"eval_recall": 0.8538370612838698, |
|
"eval_runtime": 5.0607, |
|
"eval_samples_per_second": 78.843, |
|
"eval_steps_per_second": 9.88, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.6138697862625122, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2132, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8622899159663866, |
|
"eval_loss": 0.27531638741493225, |
|
"eval_precision": 0.8589244307033712, |
|
"eval_recall": 0.8659301691216585, |
|
"eval_runtime": 5.0703, |
|
"eval_samples_per_second": 78.693, |
|
"eval_steps_per_second": 9.861, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.8498824834823608, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2055, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8541488972828073, |
|
"eval_loss": 0.27910253405570984, |
|
"eval_precision": 0.8559859154929578, |
|
"eval_recall": 0.8523822513184216, |
|
"eval_runtime": 5.0606, |
|
"eval_samples_per_second": 78.844, |
|
"eval_steps_per_second": 9.88, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.801692485809326, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1903, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8306935047100303, |
|
"eval_loss": 0.3009396195411682, |
|
"eval_precision": 0.8372758729160114, |
|
"eval_recall": 0.8249681760320058, |
|
"eval_runtime": 5.0892, |
|
"eval_samples_per_second": 78.402, |
|
"eval_steps_per_second": 9.825, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.017825126647949, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1852, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8341632880321839, |
|
"eval_loss": 0.3085351884365082, |
|
"eval_precision": 0.8479139504563233, |
|
"eval_recall": 0.8235133660665576, |
|
"eval_runtime": 5.0614, |
|
"eval_samples_per_second": 78.832, |
|
"eval_steps_per_second": 9.879, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.8085110187530518, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1758, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8556004584112431, |
|
"eval_loss": 0.28515923023223877, |
|
"eval_precision": 0.8538865546218487, |
|
"eval_recall": 0.85738316057465, |
|
"eval_runtime": 5.053, |
|
"eval_samples_per_second": 78.964, |
|
"eval_steps_per_second": 9.895, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.22409725189209, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1617, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8642908431276217, |
|
"eval_loss": 0.30107688903808594, |
|
"eval_precision": 0.8633964654080464, |
|
"eval_recall": 0.8652027641389344, |
|
"eval_runtime": 5.0864, |
|
"eval_samples_per_second": 78.444, |
|
"eval_steps_per_second": 9.83, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.442287921905518, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1581, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8703223612108386, |
|
"eval_loss": 0.3050229847431183, |
|
"eval_precision": 0.8694131129742446, |
|
"eval_recall": 0.8712493180578287, |
|
"eval_runtime": 5.0713, |
|
"eval_samples_per_second": 78.678, |
|
"eval_steps_per_second": 9.859, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.8908846378326416, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.149, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8744925055412909, |
|
"eval_loss": 0.3143095374107361, |
|
"eval_precision": 0.8639342866870956, |
|
"eval_recall": 0.8887525004546282, |
|
"eval_runtime": 5.0958, |
|
"eval_samples_per_second": 78.299, |
|
"eval_steps_per_second": 9.812, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.6980299949645996, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1386, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8790689216221131, |
|
"eval_loss": 0.302778035402298, |
|
"eval_precision": 0.8790689216221131, |
|
"eval_recall": 0.8790689216221131, |
|
"eval_runtime": 5.0849, |
|
"eval_samples_per_second": 78.468, |
|
"eval_steps_per_second": 9.833, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.6725683212280273, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1465, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8696722245432793, |
|
"eval_loss": 0.3111669719219208, |
|
"eval_precision": 0.8706135006701596, |
|
"eval_recall": 0.8687488634297145, |
|
"eval_runtime": 5.0592, |
|
"eval_samples_per_second": 78.866, |
|
"eval_steps_per_second": 9.883, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.7393283247947693, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1307, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8629480286738351, |
|
"eval_loss": 0.31975144147872925, |
|
"eval_precision": 0.8580770590314599, |
|
"eval_recall": 0.8684306237497728, |
|
"eval_runtime": 5.0777, |
|
"eval_samples_per_second": 78.579, |
|
"eval_steps_per_second": 9.847, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.382880836725235, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1231, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.32525473833084106, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 5.096, |
|
"eval_samples_per_second": 78.296, |
|
"eval_steps_per_second": 9.812, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.1525256484746933, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1344, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8541488972828073, |
|
"eval_loss": 0.3289809823036194, |
|
"eval_precision": 0.8559859154929578, |
|
"eval_recall": 0.8523822513184216, |
|
"eval_runtime": 5.0693, |
|
"eval_samples_per_second": 78.709, |
|
"eval_steps_per_second": 9.863, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.0254364013671875, |
|
"learning_rate": 0.0, |
|
"loss": 0.1229, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8589543987905864, |
|
"eval_loss": 0.32526540756225586, |
|
"eval_precision": 0.8564068100358423, |
|
"eval_recall": 0.8616566648481543, |
|
"eval_runtime": 5.0827, |
|
"eval_samples_per_second": 78.502, |
|
"eval_steps_per_second": 9.837, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.21670899273919278, |
|
"train_runtime": 1925.8785, |
|
"train_samples_per_second": 37.78, |
|
"train_steps_per_second": 1.267 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|