|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.094232082366943, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5608, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6526816032372154, |
|
"eval_loss": 0.5057180523872375, |
|
"eval_precision": 0.6593383311603651, |
|
"eval_recall": 0.6481633024186215, |
|
"eval_runtime": 5.071, |
|
"eval_samples_per_second": 78.683, |
|
"eval_steps_per_second": 9.86, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.818559408187866, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5012, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7518796992481203, |
|
"eval_f1": 0.7192637077573647, |
|
"eval_loss": 0.47916004061698914, |
|
"eval_precision": 0.7116519573339108, |
|
"eval_recall": 0.7369521731223858, |
|
"eval_runtime": 5.0875, |
|
"eval_samples_per_second": 78.427, |
|
"eval_steps_per_second": 9.828, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.010869026184082, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4628, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7694235588972431, |
|
"eval_f1": 0.7320583941605839, |
|
"eval_loss": 0.4281270503997803, |
|
"eval_precision": 0.7255639097744361, |
|
"eval_recall": 0.7418621567557737, |
|
"eval_runtime": 5.0471, |
|
"eval_samples_per_second": 79.056, |
|
"eval_steps_per_second": 9.907, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.6188292503356934, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4045, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8170426065162907, |
|
"eval_f1": 0.7764637262952703, |
|
"eval_loss": 0.39506053924560547, |
|
"eval_precision": 0.7802700348432056, |
|
"eval_recall": 0.7730496453900709, |
|
"eval_runtime": 5.0784, |
|
"eval_samples_per_second": 78.569, |
|
"eval_steps_per_second": 9.846, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.2819700241088867, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3701, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7994987468671679, |
|
"eval_f1": 0.7735784814436499, |
|
"eval_loss": 0.42390376329421997, |
|
"eval_precision": 0.7633219954648527, |
|
"eval_recall": 0.7956446626659393, |
|
"eval_runtime": 5.0599, |
|
"eval_samples_per_second": 78.856, |
|
"eval_steps_per_second": 9.882, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.4958856105804443, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3362, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7974014336917563, |
|
"eval_loss": 0.3720650374889374, |
|
"eval_precision": 0.7934340756451043, |
|
"eval_recall": 0.801918530641935, |
|
"eval_runtime": 5.0782, |
|
"eval_samples_per_second": 78.572, |
|
"eval_steps_per_second": 9.846, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.6802586317062378, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3285, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.8077810218978102, |
|
"eval_loss": 0.372498095035553, |
|
"eval_precision": 0.7988721804511278, |
|
"eval_recall": 0.8204673577014002, |
|
"eval_runtime": 5.076, |
|
"eval_samples_per_second": 78.605, |
|
"eval_steps_per_second": 9.85, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.754953384399414, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3061, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8109388749746158, |
|
"eval_loss": 0.3537139594554901, |
|
"eval_precision": 0.8087365591397849, |
|
"eval_recall": 0.8132842334969994, |
|
"eval_runtime": 5.0678, |
|
"eval_samples_per_second": 78.732, |
|
"eval_steps_per_second": 9.866, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 19.56822395324707, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3017, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8109388749746158, |
|
"eval_loss": 0.3503880798816681, |
|
"eval_precision": 0.8087365591397849, |
|
"eval_recall": 0.8132842334969994, |
|
"eval_runtime": 5.0696, |
|
"eval_samples_per_second": 78.704, |
|
"eval_steps_per_second": 9.863, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 7.07460880279541, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2942, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8186033824331697, |
|
"eval_loss": 0.33907514810562134, |
|
"eval_precision": 0.8186033824331697, |
|
"eval_recall": 0.8186033824331697, |
|
"eval_runtime": 5.0634, |
|
"eval_samples_per_second": 78.8, |
|
"eval_steps_per_second": 9.875, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.607100486755371, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2715, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.821236559139785, |
|
"eval_loss": 0.34563252329826355, |
|
"eval_precision": 0.8169406150583245, |
|
"eval_recall": 0.8261047463175123, |
|
"eval_runtime": 5.0549, |
|
"eval_samples_per_second": 78.934, |
|
"eval_steps_per_second": 9.891, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 10.076610565185547, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2703, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8262464479462671, |
|
"eval_loss": 0.353447824716568, |
|
"eval_precision": 0.8190008071955719, |
|
"eval_recall": 0.8353791598472449, |
|
"eval_runtime": 5.1064, |
|
"eval_samples_per_second": 78.137, |
|
"eval_steps_per_second": 9.792, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.735930323600769, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2759, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8202661535994868, |
|
"eval_loss": 0.332623153924942, |
|
"eval_precision": 0.8228070175438597, |
|
"eval_recall": 0.8178759774504456, |
|
"eval_runtime": 5.0603, |
|
"eval_samples_per_second": 78.849, |
|
"eval_steps_per_second": 9.881, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 11.147435188293457, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2705, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8289446964056049, |
|
"eval_loss": 0.33598676323890686, |
|
"eval_precision": 0.8266129032258065, |
|
"eval_recall": 0.8314238952536825, |
|
"eval_runtime": 5.0525, |
|
"eval_samples_per_second": 78.971, |
|
"eval_steps_per_second": 9.896, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.3279173374176025, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2576, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.839868600986979, |
|
"eval_loss": 0.3422936797142029, |
|
"eval_precision": 0.834029197080292, |
|
"eval_recall": 0.8467448627023095, |
|
"eval_runtime": 5.0807, |
|
"eval_samples_per_second": 78.532, |
|
"eval_steps_per_second": 9.841, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.688598155975342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2513, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8313646980313647, |
|
"eval_loss": 0.33944278955459595, |
|
"eval_precision": 0.825091575091575, |
|
"eval_recall": 0.8389252591380251, |
|
"eval_runtime": 5.0529, |
|
"eval_samples_per_second": 78.964, |
|
"eval_steps_per_second": 9.895, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 8.046936988830566, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2481, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8281017346283209, |
|
"eval_loss": 0.32614341378211975, |
|
"eval_precision": 0.8272965800108572, |
|
"eval_recall": 0.8289234406255683, |
|
"eval_runtime": 5.0441, |
|
"eval_samples_per_second": 79.102, |
|
"eval_steps_per_second": 9.913, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.366418361663818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2561, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8365204824303285, |
|
"eval_loss": 0.3320069909095764, |
|
"eval_precision": 0.8313636363636363, |
|
"eval_recall": 0.8424713584288053, |
|
"eval_runtime": 5.0899, |
|
"eval_samples_per_second": 78.39, |
|
"eval_steps_per_second": 9.823, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.5271737575531006, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2478, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8349466368826011, |
|
"eval_loss": 0.3268921673297882, |
|
"eval_precision": 0.8325716845878136, |
|
"eval_recall": 0.8374704491725768, |
|
"eval_runtime": 5.0459, |
|
"eval_samples_per_second": 79.074, |
|
"eval_steps_per_second": 9.909, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 10.424647331237793, |
|
"learning_rate": 0.0, |
|
"loss": 0.2451, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8357422474382676, |
|
"eval_loss": 0.32743164896965027, |
|
"eval_precision": 0.8319228265372551, |
|
"eval_recall": 0.8399709038006911, |
|
"eval_runtime": 5.0792, |
|
"eval_samples_per_second": 78.556, |
|
"eval_steps_per_second": 9.844, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.32301358082255377, |
|
"train_runtime": 1951.399, |
|
"train_samples_per_second": 37.286, |
|
"train_steps_per_second": 1.25 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|