|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.0131800174713135, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5623, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7167919799498746, |
|
"eval_f1": 0.5794790005316321, |
|
"eval_loss": 0.5053456425666809, |
|
"eval_precision": 0.6409822866344606, |
|
"eval_recall": 0.5796053827968721, |
|
"eval_runtime": 5.6071, |
|
"eval_samples_per_second": 71.159, |
|
"eval_steps_per_second": 8.917, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.634490966796875, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.518, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7293233082706767, |
|
"eval_f1": 0.599784530797236, |
|
"eval_loss": 0.4860531687736511, |
|
"eval_precision": 0.6673625792811839, |
|
"eval_recall": 0.5959719949081652, |
|
"eval_runtime": 5.7755, |
|
"eval_samples_per_second": 69.085, |
|
"eval_steps_per_second": 8.657, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.959808111190796, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4835, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7694235588972431, |
|
"eval_f1": 0.7145034843205575, |
|
"eval_loss": 0.45518842339515686, |
|
"eval_precision": 0.7210824478299833, |
|
"eval_recall": 0.7093562465902892, |
|
"eval_runtime": 5.2584, |
|
"eval_samples_per_second": 75.878, |
|
"eval_steps_per_second": 9.509, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.635169506072998, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4497, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.7520912893253319, |
|
"eval_loss": 0.4223441481590271, |
|
"eval_precision": 0.7520912893253319, |
|
"eval_recall": 0.7520912893253319, |
|
"eval_runtime": 5.0487, |
|
"eval_samples_per_second": 79.03, |
|
"eval_steps_per_second": 9.903, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 8.219679832458496, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4266, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8170426065162907, |
|
"eval_f1": 0.7740779522978476, |
|
"eval_loss": 0.399569034576416, |
|
"eval_precision": 0.7814051164566629, |
|
"eval_recall": 0.7680487361338425, |
|
"eval_runtime": 5.0767, |
|
"eval_samples_per_second": 78.595, |
|
"eval_steps_per_second": 9.849, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.150725841522217, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3907, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.784453781512605, |
|
"eval_loss": 0.3830115497112274, |
|
"eval_precision": 0.7818241274748796, |
|
"eval_recall": 0.787324968176032, |
|
"eval_runtime": 5.0718, |
|
"eval_samples_per_second": 78.67, |
|
"eval_steps_per_second": 9.858, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.297985076904297, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3742, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.798423147581139, |
|
"eval_loss": 0.3684135675430298, |
|
"eval_precision": 0.8016430472182685, |
|
"eval_recall": 0.7954628114202582, |
|
"eval_runtime": 5.0743, |
|
"eval_samples_per_second": 78.632, |
|
"eval_steps_per_second": 9.854, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 8.395323753356934, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3616, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7967966933608887, |
|
"eval_loss": 0.3719731867313385, |
|
"eval_precision": 0.7902444649446494, |
|
"eval_recall": 0.8051463902527732, |
|
"eval_runtime": 5.0484, |
|
"eval_samples_per_second": 79.035, |
|
"eval_steps_per_second": 9.904, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.748974561691284, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3294, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8076965854743632, |
|
"eval_loss": 0.36888691782951355, |
|
"eval_precision": 0.8018925518925519, |
|
"eval_recall": 0.8147390434624477, |
|
"eval_runtime": 5.0543, |
|
"eval_samples_per_second": 78.943, |
|
"eval_steps_per_second": 9.893, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 9.309541702270508, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3207, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.8110907261644079, |
|
"eval_loss": 0.36315786838531494, |
|
"eval_precision": 0.8046983557202408, |
|
"eval_recall": 0.819012547735952, |
|
"eval_runtime": 5.0709, |
|
"eval_samples_per_second": 78.684, |
|
"eval_steps_per_second": 9.86, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.2568168640136719, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3214, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8085765951950401, |
|
"eval_loss": 0.3577338457107544, |
|
"eval_precision": 0.8017470018450185, |
|
"eval_recall": 0.817239498090562, |
|
"eval_runtime": 5.1071, |
|
"eval_samples_per_second": 78.126, |
|
"eval_steps_per_second": 9.79, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.915198802947998, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3167, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.8119476846942383, |
|
"eval_loss": 0.36069995164871216, |
|
"eval_precision": 0.8045650301464256, |
|
"eval_recall": 0.8215130023640662, |
|
"eval_runtime": 5.0598, |
|
"eval_samples_per_second": 78.857, |
|
"eval_steps_per_second": 9.882, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.9545631408691406, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.289, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.8060710498409331, |
|
"eval_loss": 0.3684280812740326, |
|
"eval_precision": 0.7988372093023256, |
|
"eval_recall": 0.8154664484451719, |
|
"eval_runtime": 5.1019, |
|
"eval_samples_per_second": 78.206, |
|
"eval_steps_per_second": 9.8, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.748187065124512, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2997, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8176861216035092, |
|
"eval_loss": 0.3479882776737213, |
|
"eval_precision": 0.8193355786895284, |
|
"eval_recall": 0.8161029278050556, |
|
"eval_runtime": 5.0557, |
|
"eval_samples_per_second": 78.92, |
|
"eval_steps_per_second": 9.89, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.010083198547363, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2986, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.821236559139785, |
|
"eval_loss": 0.35758015513420105, |
|
"eval_precision": 0.8169406150583245, |
|
"eval_recall": 0.8261047463175123, |
|
"eval_runtime": 5.0955, |
|
"eval_samples_per_second": 78.304, |
|
"eval_steps_per_second": 9.813, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.9220337271690369, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2914, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8195005730140539, |
|
"eval_loss": 0.34965991973876953, |
|
"eval_precision": 0.8179621848739496, |
|
"eval_recall": 0.8211038370612839, |
|
"eval_runtime": 5.0617, |
|
"eval_samples_per_second": 78.827, |
|
"eval_steps_per_second": 9.878, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.7026562690734863, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.278, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8229427559286084, |
|
"eval_loss": 0.3539772927761078, |
|
"eval_precision": 0.8206541218637993, |
|
"eval_recall": 0.8253773413347881, |
|
"eval_runtime": 5.1199, |
|
"eval_samples_per_second": 77.931, |
|
"eval_steps_per_second": 9.766, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 5.839470863342285, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2887, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8229427559286084, |
|
"eval_loss": 0.35161107778549194, |
|
"eval_precision": 0.8206541218637993, |
|
"eval_recall": 0.8253773413347881, |
|
"eval_runtime": 5.1154, |
|
"eval_samples_per_second": 77.999, |
|
"eval_steps_per_second": 9.774, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 7.782900810241699, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2829, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8229427559286084, |
|
"eval_loss": 0.35369938611984253, |
|
"eval_precision": 0.8206541218637993, |
|
"eval_recall": 0.8253773413347881, |
|
"eval_runtime": 5.0565, |
|
"eval_samples_per_second": 78.908, |
|
"eval_steps_per_second": 9.888, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.059621334075928, |
|
"learning_rate": 0.0, |
|
"loss": 0.2771, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8263655462184873, |
|
"eval_loss": 0.35401326417922974, |
|
"eval_precision": 0.8233396753671443, |
|
"eval_recall": 0.8296508456082925, |
|
"eval_runtime": 5.0854, |
|
"eval_samples_per_second": 78.459, |
|
"eval_steps_per_second": 9.832, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7597037114448000.0, |
|
"train_loss": 0.35800845193081215, |
|
"train_runtime": 2113.1391, |
|
"train_samples_per_second": 34.432, |
|
"train_steps_per_second": 1.155 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7597037114448000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|