|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 10560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.387133002281189, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.713, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8990360727417271, |
|
"eval_f1": 0.42582781456953644, |
|
"eval_loss": 0.3558189868927002, |
|
"eval_precision": 0.49499615088529636, |
|
"eval_recall": 0.3736199883788495, |
|
"eval_runtime": 4.425, |
|
"eval_samples_per_second": 211.297, |
|
"eval_steps_per_second": 3.39, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8080163598060608, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2793, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9392328331511478, |
|
"eval_f1": 0.7174307174307173, |
|
"eval_loss": 0.19311487674713135, |
|
"eval_precision": 0.647196261682243, |
|
"eval_recall": 0.8047646717024985, |
|
"eval_runtime": 4.5332, |
|
"eval_samples_per_second": 206.254, |
|
"eval_steps_per_second": 3.309, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.451019287109375, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1876, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9461890092417768, |
|
"eval_f1": 0.7516120711890638, |
|
"eval_loss": 0.161931574344635, |
|
"eval_precision": 0.6757884972170687, |
|
"eval_recall": 0.8466008134805346, |
|
"eval_runtime": 4.5422, |
|
"eval_samples_per_second": 205.846, |
|
"eval_steps_per_second": 3.302, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.173278570175171, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1593, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.95553015999205, |
|
"eval_f1": 0.7947551511907948, |
|
"eval_loss": 0.14164301753044128, |
|
"eval_precision": 0.7366071428571429, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.559, |
|
"eval_samples_per_second": 205.089, |
|
"eval_steps_per_second": 3.29, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.1243175268173218, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1412, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9559276557686575, |
|
"eval_f1": 0.7968959058067969, |
|
"eval_loss": 0.13503268361091614, |
|
"eval_precision": 0.7385912698412699, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5319, |
|
"eval_samples_per_second": 206.317, |
|
"eval_steps_per_second": 3.31, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.4806779623031616, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1325, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.95553015999205, |
|
"eval_f1": 0.795219123505976, |
|
"eval_loss": 0.13609066605567932, |
|
"eval_precision": 0.7323874755381604, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.4797, |
|
"eval_samples_per_second": 208.719, |
|
"eval_steps_per_second": 3.348, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.750013828277588, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.126, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9553314121037464, |
|
"eval_f1": 0.7943751658264792, |
|
"eval_loss": 0.13828907907009125, |
|
"eval_precision": 0.73095703125, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.5668, |
|
"eval_samples_per_second": 204.737, |
|
"eval_steps_per_second": 3.285, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.7366927862167358, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1194, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9583126304283017, |
|
"eval_f1": 0.8088274395107684, |
|
"eval_loss": 0.13490265607833862, |
|
"eval_precision": 0.7455882352941177, |
|
"eval_recall": 0.8837884950610111, |
|
"eval_runtime": 4.5193, |
|
"eval_samples_per_second": 206.893, |
|
"eval_steps_per_second": 3.319, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.5051740407943726, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1137, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9583126304283017, |
|
"eval_f1": 0.8071869133816038, |
|
"eval_loss": 0.12994244694709778, |
|
"eval_precision": 0.7495019920318725, |
|
"eval_recall": 0.8744915746658919, |
|
"eval_runtime": 4.4853, |
|
"eval_samples_per_second": 208.458, |
|
"eval_steps_per_second": 3.344, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.5836716890335083, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1112, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9579151346516943, |
|
"eval_f1": 0.8028962188254224, |
|
"eval_loss": 0.1285104751586914, |
|
"eval_precision": 0.7455179282868526, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.5577, |
|
"eval_samples_per_second": 205.146, |
|
"eval_steps_per_second": 3.291, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.5579893589019775, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1065, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.958710126204909, |
|
"eval_f1": 0.8118811881188118, |
|
"eval_loss": 0.13038235902786255, |
|
"eval_precision": 0.7524801587301587, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.5088, |
|
"eval_samples_per_second": 207.371, |
|
"eval_steps_per_second": 3.327, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.483597993850708, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1044, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9577163867633907, |
|
"eval_f1": 0.8106080900080364, |
|
"eval_loss": 0.1328919380903244, |
|
"eval_precision": 0.7519880715705766, |
|
"eval_recall": 0.8791400348634515, |
|
"eval_runtime": 4.5202, |
|
"eval_samples_per_second": 206.851, |
|
"eval_steps_per_second": 3.318, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.8680760860443115, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1026, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9585113783166054, |
|
"eval_f1": 0.8076405703524348, |
|
"eval_loss": 0.1257127821445465, |
|
"eval_precision": 0.7520040080160321, |
|
"eval_recall": 0.8721673445671121, |
|
"eval_runtime": 4.5357, |
|
"eval_samples_per_second": 206.142, |
|
"eval_steps_per_second": 3.307, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.6697184443473816, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0989, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9599026135347312, |
|
"eval_f1": 0.8167341430499325, |
|
"eval_loss": 0.12645690143108368, |
|
"eval_precision": 0.7626008064516129, |
|
"eval_recall": 0.8791400348634515, |
|
"eval_runtime": 4.5293, |
|
"eval_samples_per_second": 206.433, |
|
"eval_steps_per_second": 3.312, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.7948130369186401, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0982, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9597038656464275, |
|
"eval_f1": 0.8180102453491508, |
|
"eval_loss": 0.1280616968870163, |
|
"eval_precision": 0.7630784708249497, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.5425, |
|
"eval_samples_per_second": 205.834, |
|
"eval_steps_per_second": 3.302, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.107656717300415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0974, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9597038656464275, |
|
"eval_f1": 0.8093322606596943, |
|
"eval_loss": 0.12642435729503632, |
|
"eval_precision": 0.7514940239043825, |
|
"eval_recall": 0.8768158047646717, |
|
"eval_runtime": 4.5035, |
|
"eval_samples_per_second": 207.616, |
|
"eval_steps_per_second": 3.331, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.980239748954773, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0966, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9589088740932128, |
|
"eval_f1": 0.814021942734814, |
|
"eval_loss": 0.12821656465530396, |
|
"eval_precision": 0.7544642857142857, |
|
"eval_recall": 0.8837884950610111, |
|
"eval_runtime": 4.5624, |
|
"eval_samples_per_second": 204.935, |
|
"eval_steps_per_second": 3.288, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.8313038349151611, |
|
"learning_rate": 5e-06, |
|
"loss": 0.095, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9589088740932128, |
|
"eval_f1": 0.8144966442953021, |
|
"eval_loss": 0.12922193109989166, |
|
"eval_precision": 0.7569860279441117, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.4773, |
|
"eval_samples_per_second": 208.833, |
|
"eval_steps_per_second": 3.35, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.775888204574585, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0941, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9595051177581239, |
|
"eval_f1": 0.8153722117710293, |
|
"eval_loss": 0.12684974074363708, |
|
"eval_precision": 0.7585, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.5049, |
|
"eval_samples_per_second": 207.55, |
|
"eval_steps_per_second": 3.33, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.084799289703369, |
|
"learning_rate": 0.0, |
|
"loss": 0.0948, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9593063698698201, |
|
"eval_f1": 0.8162496637072908, |
|
"eval_loss": 0.12777453660964966, |
|
"eval_precision": 0.7600200400801603, |
|
"eval_recall": 0.8814642649622313, |
|
"eval_runtime": 4.5062, |
|
"eval_samples_per_second": 207.492, |
|
"eval_steps_per_second": 3.329, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10560, |
|
"total_flos": 4552961808488766.0, |
|
"train_loss": 0.15358480504064848, |
|
"train_runtime": 1231.4229, |
|
"train_samples_per_second": 137.028, |
|
"train_steps_per_second": 8.575 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4552961808488766.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|