|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.639183521270752, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5413, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6445578231292517, |
|
"eval_loss": 0.5006802678108215, |
|
"eval_precision": 0.6593400801180687, |
|
"eval_recall": 0.6374340789234406, |
|
"eval_runtime": 1.8003, |
|
"eval_samples_per_second": 221.628, |
|
"eval_steps_per_second": 27.773, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.4983344078063965, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4584, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7760942760942761, |
|
"eval_loss": 0.3855762183666229, |
|
"eval_precision": 0.8122789566755084, |
|
"eval_recall": 0.7569103473358793, |
|
"eval_runtime": 1.8373, |
|
"eval_samples_per_second": 217.166, |
|
"eval_steps_per_second": 27.214, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 5.97755765914917, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3559, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8079089841803424, |
|
"eval_loss": 0.3407064378261566, |
|
"eval_precision": 0.8638322884012539, |
|
"eval_recall": 0.7814148026913984, |
|
"eval_runtime": 1.8397, |
|
"eval_samples_per_second": 216.877, |
|
"eval_steps_per_second": 27.178, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.0363476276397705, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2961, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8419946387230413, |
|
"eval_loss": 0.3088829517364502, |
|
"eval_precision": 0.8437691365584814, |
|
"eval_recall": 0.8402891434806329, |
|
"eval_runtime": 1.8466, |
|
"eval_samples_per_second": 216.069, |
|
"eval_steps_per_second": 27.076, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.3318538665771484, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.276, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8365204824303285, |
|
"eval_loss": 0.29173794388771057, |
|
"eval_precision": 0.8313636363636363, |
|
"eval_recall": 0.8424713584288053, |
|
"eval_runtime": 1.8414, |
|
"eval_samples_per_second": 216.677, |
|
"eval_steps_per_second": 27.153, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.3067033290863037, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2555, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.842789598108747, |
|
"eval_loss": 0.29054704308509827, |
|
"eval_precision": 0.842789598108747, |
|
"eval_recall": 0.842789598108747, |
|
"eval_runtime": 1.847, |
|
"eval_samples_per_second": 216.025, |
|
"eval_steps_per_second": 27.071, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.5437530279159546, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2427, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.84402249790578, |
|
"eval_loss": 0.30313801765441895, |
|
"eval_precision": 0.8669909824394875, |
|
"eval_recall": 0.8281051100200036, |
|
"eval_runtime": 1.8479, |
|
"eval_samples_per_second": 215.922, |
|
"eval_steps_per_second": 27.058, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.033709764480591, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2219, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8522278069611882, |
|
"eval_loss": 0.2907596528530121, |
|
"eval_precision": 0.8513631702756499, |
|
"eval_recall": 0.8531096563011457, |
|
"eval_runtime": 1.8468, |
|
"eval_samples_per_second": 216.053, |
|
"eval_steps_per_second": 27.074, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.670888900756836, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2158, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8539996181748759, |
|
"eval_loss": 0.3083769381046295, |
|
"eval_precision": 0.8759595959595959, |
|
"eval_recall": 0.8384251682124022, |
|
"eval_runtime": 1.856, |
|
"eval_samples_per_second": 214.979, |
|
"eval_steps_per_second": 26.94, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.6058239936828613, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8517301860990547, |
|
"eval_loss": 0.29381993412971497, |
|
"eval_precision": 0.8456788321167883, |
|
"eval_recall": 0.8588379705400981, |
|
"eval_runtime": 1.8468, |
|
"eval_samples_per_second": 216.045, |
|
"eval_steps_per_second": 27.073, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.181007146835327, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1885, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8514869535493182, |
|
"eval_loss": 0.2976568639278412, |
|
"eval_precision": 0.8523821128305106, |
|
"eval_recall": 0.8506092016730314, |
|
"eval_runtime": 1.8451, |
|
"eval_samples_per_second": 216.253, |
|
"eval_steps_per_second": 27.099, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.1322133541107178, |
|
"learning_rate": 2e-05, |
|
"loss": 0.183, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8556621579112929, |
|
"eval_loss": 0.30698344111442566, |
|
"eval_precision": 0.871654421411703, |
|
"eval_recall": 0.8434260774686306, |
|
"eval_runtime": 1.8533, |
|
"eval_samples_per_second": 215.295, |
|
"eval_steps_per_second": 26.979, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.6321003437042236, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1752, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8569892473118279, |
|
"eval_loss": 0.29585033655166626, |
|
"eval_precision": 0.8522004241781549, |
|
"eval_recall": 0.8623840698308783, |
|
"eval_runtime": 1.846, |
|
"eval_samples_per_second": 216.146, |
|
"eval_steps_per_second": 27.086, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.546229600906372, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1558, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8530841286673736, |
|
"eval_loss": 0.3040328025817871, |
|
"eval_precision": 0.8446597760551249, |
|
"eval_recall": 0.8638388797963266, |
|
"eval_runtime": 1.8508, |
|
"eval_samples_per_second": 215.586, |
|
"eval_steps_per_second": 27.016, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.5455009937286377, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1538, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8484099018899409, |
|
"eval_loss": 0.30823931097984314, |
|
"eval_precision": 0.8430645161290322, |
|
"eval_recall": 0.8545644662665939, |
|
"eval_runtime": 1.848, |
|
"eval_samples_per_second": 215.904, |
|
"eval_steps_per_second": 27.056, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.4319658279418945, |
|
"learning_rate": 1e-05, |
|
"loss": 0.152, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8483536940081443, |
|
"eval_loss": 0.30997762084007263, |
|
"eval_precision": 0.8575792287132493, |
|
"eval_recall": 0.8406073831605747, |
|
"eval_runtime": 1.8455, |
|
"eval_samples_per_second": 216.202, |
|
"eval_steps_per_second": 27.093, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.420119285583496, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1436, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8510304659498208, |
|
"eval_loss": 0.31050172448158264, |
|
"eval_precision": 0.8463237893248498, |
|
"eval_recall": 0.8563375159119839, |
|
"eval_runtime": 1.8478, |
|
"eval_samples_per_second": 215.931, |
|
"eval_steps_per_second": 27.059, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.7008257508277893, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1426, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8503151260504201, |
|
"eval_loss": 0.3118613362312317, |
|
"eval_precision": 0.8470628455912955, |
|
"eval_recall": 0.8538370612838698, |
|
"eval_runtime": 1.8481, |
|
"eval_samples_per_second": 215.903, |
|
"eval_steps_per_second": 27.056, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.8680572509765625, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1398, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8569892473118279, |
|
"eval_loss": 0.316354900598526, |
|
"eval_precision": 0.8522004241781549, |
|
"eval_recall": 0.8623840698308783, |
|
"eval_runtime": 1.8465, |
|
"eval_samples_per_second": 216.08, |
|
"eval_steps_per_second": 27.078, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.442784547805786, |
|
"learning_rate": 0.0, |
|
"loss": 0.14, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8563025210084034, |
|
"eval_loss": 0.31680676341056824, |
|
"eval_precision": 0.8529936381473334, |
|
"eval_recall": 0.8598836152027641, |
|
"eval_runtime": 1.8511, |
|
"eval_samples_per_second": 215.549, |
|
"eval_steps_per_second": 27.011, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8460375995160000.0, |
|
"train_loss": 0.2318932650519199, |
|
"train_runtime": 643.2373, |
|
"train_samples_per_second": 113.333, |
|
"train_steps_per_second": 3.793 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8460375995160000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|