|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.532687187194824, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.551, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6144317942230656, |
|
"eval_loss": 0.5008909106254578, |
|
"eval_precision": 0.65566534914361, |
|
"eval_recall": 0.6074286233860702, |
|
"eval_runtime": 1.7719, |
|
"eval_samples_per_second": 225.176, |
|
"eval_steps_per_second": 28.218, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.3572421073913574, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4528, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7819548872180451, |
|
"eval_f1": 0.7578021978021978, |
|
"eval_loss": 0.4117695391178131, |
|
"eval_precision": 0.7486942070275404, |
|
"eval_recall": 0.7857337697763229, |
|
"eval_runtime": 1.781, |
|
"eval_samples_per_second": 224.038, |
|
"eval_steps_per_second": 28.075, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.5434460639953613, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3588, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8063505120717311, |
|
"eval_loss": 0.3427870571613312, |
|
"eval_precision": 0.844033177433499, |
|
"eval_recall": 0.7853700672849608, |
|
"eval_runtime": 1.7815, |
|
"eval_samples_per_second": 223.975, |
|
"eval_steps_per_second": 28.067, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.183424949645996, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3192, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8246499363520641, |
|
"eval_loss": 0.31169387698173523, |
|
"eval_precision": 0.8246499363520641, |
|
"eval_recall": 0.8246499363520641, |
|
"eval_runtime": 1.8089, |
|
"eval_samples_per_second": 220.578, |
|
"eval_steps_per_second": 27.641, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.7957929372787476, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2714, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8446181767415888, |
|
"eval_loss": 0.3037036657333374, |
|
"eval_precision": 0.835902201887332, |
|
"eval_recall": 0.8560192762320422, |
|
"eval_runtime": 1.8052, |
|
"eval_samples_per_second": 221.025, |
|
"eval_steps_per_second": 27.697, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.427933692932129, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.257, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8475258334958082, |
|
"eval_loss": 0.2832619249820709, |
|
"eval_precision": 0.8591828192414193, |
|
"eval_recall": 0.8381069285324605, |
|
"eval_runtime": 1.8047, |
|
"eval_samples_per_second": 221.086, |
|
"eval_steps_per_second": 27.705, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.5598583221435547, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2405, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8616171059774413, |
|
"eval_loss": 0.28606978058815, |
|
"eval_precision": 0.859873949579832, |
|
"eval_recall": 0.8634297144935443, |
|
"eval_runtime": 1.8066, |
|
"eval_samples_per_second": 220.859, |
|
"eval_steps_per_second": 27.677, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.1964571475982666, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2163, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8556004584112431, |
|
"eval_loss": 0.295376181602478, |
|
"eval_precision": 0.8538865546218487, |
|
"eval_recall": 0.85738316057465, |
|
"eval_runtime": 1.8106, |
|
"eval_samples_per_second": 220.364, |
|
"eval_steps_per_second": 27.614, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.184772491455078, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2135, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8472902633190447, |
|
"eval_loss": 0.29421547055244446, |
|
"eval_precision": 0.8510272912927781, |
|
"eval_recall": 0.8438352427714131, |
|
"eval_runtime": 1.8081, |
|
"eval_samples_per_second": 220.677, |
|
"eval_steps_per_second": 27.654, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.817326545715332, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2001, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8537492688633261, |
|
"eval_loss": 0.3002479076385498, |
|
"eval_precision": 0.8656062850151329, |
|
"eval_recall": 0.8441534824513548, |
|
"eval_runtime": 1.8147, |
|
"eval_samples_per_second": 219.877, |
|
"eval_steps_per_second": 27.553, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.364151954650879, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1825, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8676331036823873, |
|
"eval_loss": 0.30105578899383545, |
|
"eval_precision": 0.8749292230261088, |
|
"eval_recall": 0.8612474995453718, |
|
"eval_runtime": 1.7869, |
|
"eval_samples_per_second": 223.295, |
|
"eval_steps_per_second": 27.982, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.355996608734131, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1765, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8656154317207594, |
|
"eval_loss": 0.28576698899269104, |
|
"eval_precision": 0.869546382820719, |
|
"eval_recall": 0.861974904528096, |
|
"eval_runtime": 1.7863, |
|
"eval_samples_per_second": 223.361, |
|
"eval_steps_per_second": 27.99, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.647818922996521, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1674, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8748655913978494, |
|
"eval_loss": 0.29319024085998535, |
|
"eval_precision": 0.86983032873807, |
|
"eval_recall": 0.8805237315875614, |
|
"eval_runtime": 1.7923, |
|
"eval_samples_per_second": 222.613, |
|
"eval_steps_per_second": 27.896, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.9686295986175537, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1597, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8668668668668669, |
|
"eval_loss": 0.2937151789665222, |
|
"eval_precision": 0.8598901098901099, |
|
"eval_recall": 0.8752045826513912, |
|
"eval_runtime": 1.8072, |
|
"eval_samples_per_second": 220.782, |
|
"eval_steps_per_second": 27.667, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.176563262939453, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1564, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8717238211879976, |
|
"eval_loss": 0.2963174879550934, |
|
"eval_precision": 0.8757194133300328, |
|
"eval_recall": 0.8680214584469903, |
|
"eval_runtime": 1.8185, |
|
"eval_samples_per_second": 219.414, |
|
"eval_steps_per_second": 27.495, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.7651334404945374, |
|
"learning_rate": 1e-05, |
|
"loss": 0.142, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8683279483657071, |
|
"eval_loss": 0.302460253238678, |
|
"eval_precision": 0.873366724738676, |
|
"eval_recall": 0.863747954173486, |
|
"eval_runtime": 1.8132, |
|
"eval_samples_per_second": 220.059, |
|
"eval_steps_per_second": 27.576, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.19514738023281097, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.143, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8682773109243698, |
|
"eval_loss": 0.2950632870197296, |
|
"eval_precision": 0.864855223259409, |
|
"eval_recall": 0.8719767230405528, |
|
"eval_runtime": 1.8087, |
|
"eval_samples_per_second": 220.597, |
|
"eval_steps_per_second": 27.644, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.15066123008728027, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1315, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.3012860417366028, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 1.8058, |
|
"eval_samples_per_second": 220.949, |
|
"eval_steps_per_second": 27.688, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.04642534255981445, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1378, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8629148629148629, |
|
"eval_loss": 0.30377450585365295, |
|
"eval_precision": 0.8657894736842104, |
|
"eval_recall": 0.860201854882706, |
|
"eval_runtime": 1.8121, |
|
"eval_samples_per_second": 220.187, |
|
"eval_steps_per_second": 27.592, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.5789785385131836, |
|
"learning_rate": 0.0, |
|
"loss": 0.1333, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.29736196994781494, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 1.814, |
|
"eval_samples_per_second": 219.959, |
|
"eval_steps_per_second": 27.564, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.23053875125822473, |
|
"train_runtime": 628.042, |
|
"train_samples_per_second": 115.852, |
|
"train_steps_per_second": 3.885 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|