|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.9823102951049805, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5456, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7468671679197995, |
|
"eval_f1": 0.6641666041651041, |
|
"eval_loss": 0.49161723256111145, |
|
"eval_precision": 0.6912698412698413, |
|
"eval_recall": 0.6533915257319513, |
|
"eval_runtime": 1.7896, |
|
"eval_samples_per_second": 222.952, |
|
"eval_steps_per_second": 27.939, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.931857109069824, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4369, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8120300751879699, |
|
"eval_f1": 0.7912087912087912, |
|
"eval_loss": 0.41080209612846375, |
|
"eval_precision": 0.7802706552706553, |
|
"eval_recall": 0.822013093289689, |
|
"eval_runtime": 1.8221, |
|
"eval_samples_per_second": 218.982, |
|
"eval_steps_per_second": 27.441, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.00280499458313, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3316, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.81524926686217, |
|
"eval_loss": 0.3294452726840973, |
|
"eval_precision": 0.8462682339611953, |
|
"eval_recall": 0.7964175304600837, |
|
"eval_runtime": 1.7904, |
|
"eval_samples_per_second": 222.854, |
|
"eval_steps_per_second": 27.927, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.0830881595611572, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2909, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8499551039516197, |
|
"eval_loss": 0.30187976360321045, |
|
"eval_precision": 0.8547473867595818, |
|
"eval_recall": 0.8456082924168031, |
|
"eval_runtime": 1.8, |
|
"eval_samples_per_second": 221.672, |
|
"eval_steps_per_second": 27.778, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.5169291496276855, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2584, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.842789598108747, |
|
"eval_loss": 0.3023463487625122, |
|
"eval_precision": 0.842789598108747, |
|
"eval_recall": 0.842789598108747, |
|
"eval_runtime": 1.7982, |
|
"eval_samples_per_second": 221.888, |
|
"eval_steps_per_second": 27.805, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.894607424736023, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.237, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8375505157126486, |
|
"eval_loss": 0.30203157663345337, |
|
"eval_precision": 0.8359243697478991, |
|
"eval_recall": 0.8392434988179669, |
|
"eval_runtime": 1.7962, |
|
"eval_samples_per_second": 222.138, |
|
"eval_steps_per_second": 27.837, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.33639559149742126, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2186, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8491157824491158, |
|
"eval_loss": 0.2988818287849426, |
|
"eval_precision": 0.8424908424908425, |
|
"eval_recall": 0.8570649208947081, |
|
"eval_runtime": 1.8005, |
|
"eval_samples_per_second": 221.607, |
|
"eval_steps_per_second": 27.77, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.405568242073059, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2108, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8614765038536611, |
|
"eval_loss": 0.29612118005752563, |
|
"eval_precision": 0.8686536646744258, |
|
"eval_recall": 0.8552009456264775, |
|
"eval_runtime": 1.8282, |
|
"eval_samples_per_second": 218.244, |
|
"eval_steps_per_second": 27.349, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.2904157638549805, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1898, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8480717680029244, |
|
"eval_loss": 0.30133265256881714, |
|
"eval_precision": 0.8498775260257195, |
|
"eval_recall": 0.8463356973995272, |
|
"eval_runtime": 1.7951, |
|
"eval_samples_per_second": 222.272, |
|
"eval_steps_per_second": 27.854, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.820188522338867, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1894, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8456742372671576, |
|
"eval_loss": 0.32312536239624023, |
|
"eval_precision": 0.8536697247706422, |
|
"eval_recall": 0.8388343335151845, |
|
"eval_runtime": 1.8085, |
|
"eval_samples_per_second": 220.63, |
|
"eval_steps_per_second": 27.648, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.6913771629333496, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1817, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8514869535493182, |
|
"eval_loss": 0.301199346780777, |
|
"eval_precision": 0.8523821128305106, |
|
"eval_recall": 0.8506092016730314, |
|
"eval_runtime": 1.8217, |
|
"eval_samples_per_second": 219.025, |
|
"eval_steps_per_second": 27.447, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.912967681884766, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1723, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8359175094431583, |
|
"eval_loss": 0.29794374108314514, |
|
"eval_precision": 0.8376607470912432, |
|
"eval_recall": 0.8342425895617385, |
|
"eval_runtime": 1.7955, |
|
"eval_samples_per_second": 222.218, |
|
"eval_steps_per_second": 27.847, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.4766283929347992, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1547, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.2937452793121338, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 1.8242, |
|
"eval_samples_per_second": 218.73, |
|
"eval_steps_per_second": 27.41, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.687190532684326, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1569, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8485547445255475, |
|
"eval_loss": 0.3064776360988617, |
|
"eval_precision": 0.8383458646616542, |
|
"eval_recall": 0.8627932351336607, |
|
"eval_runtime": 1.7921, |
|
"eval_samples_per_second": 222.641, |
|
"eval_steps_per_second": 27.9, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.828568458557129, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1442, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8529524583135901, |
|
"eval_loss": 0.2884095013141632, |
|
"eval_precision": 0.8504480286738352, |
|
"eval_recall": 0.8556101109292599, |
|
"eval_runtime": 1.8008, |
|
"eval_samples_per_second": 221.563, |
|
"eval_steps_per_second": 27.765, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.8095035552978516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1435, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8563025210084034, |
|
"eval_loss": 0.30156683921813965, |
|
"eval_precision": 0.8529936381473334, |
|
"eval_recall": 0.8598836152027641, |
|
"eval_runtime": 1.8479, |
|
"eval_samples_per_second": 215.919, |
|
"eval_steps_per_second": 27.058, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.34168025851249695, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1378, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8556962025316456, |
|
"eval_loss": 0.31144392490386963, |
|
"eval_precision": 0.8477571494464944, |
|
"eval_recall": 0.8656119294417166, |
|
"eval_runtime": 1.8294, |
|
"eval_samples_per_second": 218.106, |
|
"eval_steps_per_second": 27.332, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.2586296498775482, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1377, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8550328105883662, |
|
"eval_loss": 0.3096340000629425, |
|
"eval_precision": 0.8482905982905984, |
|
"eval_recall": 0.8631114748136025, |
|
"eval_runtime": 1.8332, |
|
"eval_samples_per_second": 217.658, |
|
"eval_steps_per_second": 27.275, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0663389191031456, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1307, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8556004584112431, |
|
"eval_loss": 0.30646833777427673, |
|
"eval_precision": 0.8538865546218487, |
|
"eval_recall": 0.85738316057465, |
|
"eval_runtime": 1.8307, |
|
"eval_samples_per_second": 217.954, |
|
"eval_steps_per_second": 27.313, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.8260014057159424, |
|
"learning_rate": 0.0, |
|
"loss": 0.126, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8656072933585827, |
|
"eval_loss": 0.30540019273757935, |
|
"eval_precision": 0.8614399005740664, |
|
"eval_recall": 0.8702036733951628, |
|
"eval_runtime": 1.8294, |
|
"eval_samples_per_second": 218.099, |
|
"eval_steps_per_second": 27.331, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.21972156196344095, |
|
"train_runtime": 635.9425, |
|
"train_samples_per_second": 114.413, |
|
"train_steps_per_second": 3.837 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|