|
{ |
|
"best_metric": 0.2574561536312103, |
|
"best_model_checkpoint": "prostate-mri-T2w-v03/checkpoint-528", |
|
"epoch": 33.0, |
|
"eval_steps": 500, |
|
"global_step": 528, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 110.21771240234375, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.6921, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 39.45058822631836, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.6934, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 25.318283081054688, |
|
"learning_rate": 1.875e-06, |
|
"loss": 0.6819, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 60.688926696777344, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.6904, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 61.651947021484375, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.6924, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 114.38175201416016, |
|
"learning_rate": 3.75e-06, |
|
"loss": 0.6997, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 129.98809814453125, |
|
"learning_rate": 4.375e-06, |
|
"loss": 0.6907, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 27.817869186401367, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6943, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 30.465864181518555, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.7056, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 85.87218475341797, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.6987, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 126.83247375488281, |
|
"learning_rate": 6.875000000000001e-06, |
|
"loss": 0.7019, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 55.31666564941406, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.6875, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 76.3473129272461, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.6877, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 61.97464370727539, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.6907, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 72.70734405517578, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.6963, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 187.76124572753906, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6849, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4838709677419355, |
|
"eval_auc": 0.39548319327731096, |
|
"eval_f1": 0.6444444444444445, |
|
"eval_loss": 0.6953126192092896, |
|
"eval_precision": 0.5178571428571429, |
|
"eval_recall": 0.8529411764705882, |
|
"eval_runtime": 1.8981, |
|
"eval_samples_per_second": 32.665, |
|
"eval_steps_per_second": 1.054, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 64.36949920654297, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 0.6953, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 100.7771987915039, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.6824, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 85.3595962524414, |
|
"learning_rate": 1.1875e-05, |
|
"loss": 0.6865, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 63.85357666015625, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.6899, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 41.52039337158203, |
|
"learning_rate": 1.3125e-05, |
|
"loss": 0.697, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 46.41688537597656, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.6851, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 48.81169128417969, |
|
"learning_rate": 1.4374999999999999e-05, |
|
"loss": 0.6919, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 44.77475357055664, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.6873, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 152.93743896484375, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.6997, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 56.11520767211914, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.6973, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 224.677734375, |
|
"learning_rate": 1.6875000000000004e-05, |
|
"loss": 0.6912, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 48.88201904296875, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.6946, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.8125, |
|
"grad_norm": 69.78736877441406, |
|
"learning_rate": 1.8125e-05, |
|
"loss": 0.6838, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 94.947021484375, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.7002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.9375, |
|
"grad_norm": 84.53536987304688, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 0.6865, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 158.0510711669922, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7018, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_auc": 0.5530462184873949, |
|
"eval_f1": 0.6585365853658537, |
|
"eval_loss": 0.6887602806091309, |
|
"eval_precision": 0.5625, |
|
"eval_recall": 0.7941176470588235, |
|
"eval_runtime": 1.8847, |
|
"eval_samples_per_second": 32.897, |
|
"eval_steps_per_second": 1.061, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.0625, |
|
"grad_norm": 55.63325881958008, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 0.6812, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 76.34017944335938, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.6929, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.1875, |
|
"grad_norm": 161.55897521972656, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 0.6743, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 49.372947692871094, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.687, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.3125, |
|
"grad_norm": 59.057132720947266, |
|
"learning_rate": 2.3125000000000003e-05, |
|
"loss": 0.689, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 49.84357452392578, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.6677, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.4375, |
|
"grad_norm": 321.417724609375, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 0.6729, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 117.68087768554688, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6787, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.5625, |
|
"grad_norm": 32.05921936035156, |
|
"learning_rate": 2.5625e-05, |
|
"loss": 0.7009, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 77.77110290527344, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.6897, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.6875, |
|
"grad_norm": 80.83302307128906, |
|
"learning_rate": 2.6875e-05, |
|
"loss": 0.7002, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 43.50077438354492, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.6938, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.8125, |
|
"grad_norm": 98.61802673339844, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 0.6787, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 42.48042678833008, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 0.6807, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.9375, |
|
"grad_norm": 34.907432556152344, |
|
"learning_rate": 2.9375000000000003e-05, |
|
"loss": 0.7095, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 55.62687683105469, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7181, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_auc": 0.6307773109243697, |
|
"eval_f1": 0.7083333333333334, |
|
"eval_loss": 0.6826488375663757, |
|
"eval_precision": 0.5483870967741935, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 1.9113, |
|
"eval_samples_per_second": 32.439, |
|
"eval_steps_per_second": 1.046, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0625, |
|
"grad_norm": 33.758544921875, |
|
"learning_rate": 3.0625000000000006e-05, |
|
"loss": 0.6763, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 49.062286376953125, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.6807, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.1875, |
|
"grad_norm": 56.35905838012695, |
|
"learning_rate": 3.1875e-05, |
|
"loss": 0.7012, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 46.76564025878906, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.686, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.3125, |
|
"grad_norm": 33.05558395385742, |
|
"learning_rate": 3.3125e-05, |
|
"loss": 0.6731, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 3.375, |
|
"grad_norm": 35.67483901977539, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.6968, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.4375, |
|
"grad_norm": 42.819549560546875, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 0.6938, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 36.487857818603516, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.6777, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.5625, |
|
"grad_norm": 27.21498680114746, |
|
"learning_rate": 3.5625000000000005e-05, |
|
"loss": 0.6929, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.625, |
|
"grad_norm": 33.59840774536133, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.6897, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.6875, |
|
"grad_norm": 44.81986618041992, |
|
"learning_rate": 3.6875e-05, |
|
"loss": 0.6973, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 34.137603759765625, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.7063, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.8125, |
|
"grad_norm": 28.17397117614746, |
|
"learning_rate": 3.8125e-05, |
|
"loss": 0.6851, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 3.875, |
|
"grad_norm": 31.33745002746582, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.6912, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 3.9375, |
|
"grad_norm": 29.169218063354492, |
|
"learning_rate": 3.9375e-05, |
|
"loss": 0.6726, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 21.54955291748047, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6465, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.532258064516129, |
|
"eval_auc": 0.4889705882352941, |
|
"eval_f1": 0.6947368421052632, |
|
"eval_loss": 0.6890751123428345, |
|
"eval_precision": 0.5409836065573771, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 1.9254, |
|
"eval_samples_per_second": 32.201, |
|
"eval_steps_per_second": 1.039, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.0625, |
|
"grad_norm": 27.51595115661621, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 0.6736, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.125, |
|
"grad_norm": 30.91176414489746, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.7058, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.1875, |
|
"grad_norm": 30.3780574798584, |
|
"learning_rate": 4.1875e-05, |
|
"loss": 0.6824, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 24.23114013671875, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.6511, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.3125, |
|
"grad_norm": 29.467042922973633, |
|
"learning_rate": 4.3125000000000005e-05, |
|
"loss": 0.668, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.375, |
|
"grad_norm": 33.71297073364258, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.6836, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.4375, |
|
"grad_norm": 37.32026290893555, |
|
"learning_rate": 4.4375e-05, |
|
"loss": 0.6907, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 35.53947067260742, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6746, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.5625, |
|
"grad_norm": 19.044368743896484, |
|
"learning_rate": 4.5625e-05, |
|
"loss": 0.6924, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 4.625, |
|
"grad_norm": 28.231525421142578, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 0.6499, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 4.6875, |
|
"grad_norm": 34.95646667480469, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.6882, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 27.18376350402832, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.6821, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 4.8125, |
|
"grad_norm": 36.19109344482422, |
|
"learning_rate": 4.8125000000000004e-05, |
|
"loss": 0.7251, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.875, |
|
"grad_norm": 43.81327819824219, |
|
"learning_rate": 4.875e-05, |
|
"loss": 0.6829, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 4.9375, |
|
"grad_norm": 49.45500564575195, |
|
"learning_rate": 4.937500000000001e-05, |
|
"loss": 0.6931, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 63.39349365234375, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_auc": 0.4149159663865547, |
|
"eval_f1": 0.7083333333333334, |
|
"eval_loss": 0.697076678276062, |
|
"eval_precision": 0.5483870967741935, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 1.9324, |
|
"eval_samples_per_second": 32.084, |
|
"eval_steps_per_second": 1.035, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0625, |
|
"grad_norm": 30.904890060424805, |
|
"learning_rate": 4.993055555555556e-05, |
|
"loss": 0.6575, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 5.125, |
|
"grad_norm": 28.443708419799805, |
|
"learning_rate": 4.986111111111111e-05, |
|
"loss": 0.6843, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 5.1875, |
|
"grad_norm": 31.65029525756836, |
|
"learning_rate": 4.979166666666667e-05, |
|
"loss": 0.6775, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"grad_norm": 41.979393005371094, |
|
"learning_rate": 4.972222222222223e-05, |
|
"loss": 0.6704, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 5.3125, |
|
"grad_norm": 57.692604064941406, |
|
"learning_rate": 4.965277777777778e-05, |
|
"loss": 0.6951, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.375, |
|
"grad_norm": 29.71929359436035, |
|
"learning_rate": 4.958333333333334e-05, |
|
"loss": 0.7029, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 5.4375, |
|
"grad_norm": 21.230836868286133, |
|
"learning_rate": 4.951388888888889e-05, |
|
"loss": 0.6818, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 28.481653213500977, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.6799, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 5.5625, |
|
"grad_norm": 28.052490234375, |
|
"learning_rate": 4.937500000000001e-05, |
|
"loss": 0.6133, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 5.625, |
|
"grad_norm": 32.066349029541016, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 0.6906, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.6875, |
|
"grad_norm": 35.96651840209961, |
|
"learning_rate": 4.923611111111112e-05, |
|
"loss": 0.674, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 47.34099197387695, |
|
"learning_rate": 4.9166666666666665e-05, |
|
"loss": 0.646, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.8125, |
|
"grad_norm": 25.086402893066406, |
|
"learning_rate": 4.909722222222222e-05, |
|
"loss": 0.6914, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 5.875, |
|
"grad_norm": 41.258033752441406, |
|
"learning_rate": 4.902777777777778e-05, |
|
"loss": 0.7019, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 5.9375, |
|
"grad_norm": 41.83433532714844, |
|
"learning_rate": 4.8958333333333335e-05, |
|
"loss": 0.6639, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 58.20046615600586, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.6855, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_auc": 0.5115546218487395, |
|
"eval_f1": 0.7083333333333334, |
|
"eval_loss": 0.6881301999092102, |
|
"eval_precision": 0.5483870967741935, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 1.9596, |
|
"eval_samples_per_second": 31.639, |
|
"eval_steps_per_second": 1.021, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.0625, |
|
"grad_norm": 46.368438720703125, |
|
"learning_rate": 4.8819444444444444e-05, |
|
"loss": 0.674, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 6.125, |
|
"grad_norm": 24.048921585083008, |
|
"learning_rate": 4.875e-05, |
|
"loss": 0.616, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 6.1875, |
|
"grad_norm": 61.79984664916992, |
|
"learning_rate": 4.8680555555555554e-05, |
|
"loss": 0.6044, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 55.52322769165039, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.7114, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.3125, |
|
"grad_norm": 52.9608268737793, |
|
"learning_rate": 4.854166666666667e-05, |
|
"loss": 0.7129, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 6.375, |
|
"grad_norm": 36.51575469970703, |
|
"learning_rate": 4.8472222222222224e-05, |
|
"loss": 0.6807, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 6.4375, |
|
"grad_norm": 34.925045013427734, |
|
"learning_rate": 4.840277777777778e-05, |
|
"loss": 0.7192, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 54.098388671875, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.64, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 6.5625, |
|
"grad_norm": 42.83259201049805, |
|
"learning_rate": 4.8263888888888895e-05, |
|
"loss": 0.6606, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 6.625, |
|
"grad_norm": 52.560943603515625, |
|
"learning_rate": 4.819444444444445e-05, |
|
"loss": 0.6775, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 6.6875, |
|
"grad_norm": 47.502281188964844, |
|
"learning_rate": 4.8125000000000004e-05, |
|
"loss": 0.6632, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 45.004520416259766, |
|
"learning_rate": 4.805555555555556e-05, |
|
"loss": 0.6558, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 6.8125, |
|
"grad_norm": 37.94780349731445, |
|
"learning_rate": 4.7986111111111113e-05, |
|
"loss": 0.6586, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 6.875, |
|
"grad_norm": 43.655487060546875, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 0.619, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.9375, |
|
"grad_norm": 38.0520133972168, |
|
"learning_rate": 4.784722222222223e-05, |
|
"loss": 0.7035, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 107.8686752319336, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.6764, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_auc": 0.7468487394957983, |
|
"eval_f1": 0.7021276595744681, |
|
"eval_loss": 0.6627079844474792, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 4.1619, |
|
"eval_samples_per_second": 14.897, |
|
"eval_steps_per_second": 0.481, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.0625, |
|
"grad_norm": 58.015350341796875, |
|
"learning_rate": 4.770833333333334e-05, |
|
"loss": 0.6333, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 7.125, |
|
"grad_norm": 38.808807373046875, |
|
"learning_rate": 4.7638888888888887e-05, |
|
"loss": 0.736, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 7.1875, |
|
"grad_norm": 44.99721145629883, |
|
"learning_rate": 4.756944444444444e-05, |
|
"loss": 0.6514, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 49.331939697265625, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.6732, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 7.3125, |
|
"grad_norm": 42.08985900878906, |
|
"learning_rate": 4.743055555555556e-05, |
|
"loss": 0.686, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 7.375, |
|
"grad_norm": 35.44804382324219, |
|
"learning_rate": 4.736111111111111e-05, |
|
"loss": 0.6517, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 7.4375, |
|
"grad_norm": 68.73687744140625, |
|
"learning_rate": 4.7291666666666666e-05, |
|
"loss": 0.6578, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 37.671722412109375, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.6545, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.5625, |
|
"grad_norm": 30.469106674194336, |
|
"learning_rate": 4.7152777777777776e-05, |
|
"loss": 0.616, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 7.625, |
|
"grad_norm": 46.04682540893555, |
|
"learning_rate": 4.708333333333334e-05, |
|
"loss": 0.6775, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 7.6875, |
|
"grad_norm": 75.76403045654297, |
|
"learning_rate": 4.701388888888889e-05, |
|
"loss": 0.6191, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 54.207244873046875, |
|
"learning_rate": 4.6944444444444446e-05, |
|
"loss": 0.6384, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"grad_norm": 33.48508071899414, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.6265, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 7.875, |
|
"grad_norm": 35.78009033203125, |
|
"learning_rate": 4.6805555555555556e-05, |
|
"loss": 0.6279, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 7.9375, |
|
"grad_norm": 56.53678894042969, |
|
"learning_rate": 4.673611111111112e-05, |
|
"loss": 0.6654, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 68.72865295410156, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.6589, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5806451612903226, |
|
"eval_auc": 0.7064075630252102, |
|
"eval_f1": 0.717391304347826, |
|
"eval_loss": 0.6593685746192932, |
|
"eval_precision": 0.5689655172413793, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 2.9827, |
|
"eval_samples_per_second": 20.787, |
|
"eval_steps_per_second": 0.671, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.0625, |
|
"grad_norm": 69.71391296386719, |
|
"learning_rate": 4.6597222222222226e-05, |
|
"loss": 0.634, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 8.125, |
|
"grad_norm": 60.223880767822266, |
|
"learning_rate": 4.652777777777778e-05, |
|
"loss": 0.631, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.1875, |
|
"grad_norm": 36.413326263427734, |
|
"learning_rate": 4.6458333333333335e-05, |
|
"loss": 0.6221, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"grad_norm": 46.27412414550781, |
|
"learning_rate": 4.638888888888889e-05, |
|
"loss": 0.619, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 8.3125, |
|
"grad_norm": 39.7827262878418, |
|
"learning_rate": 4.631944444444445e-05, |
|
"loss": 0.719, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 8.375, |
|
"grad_norm": 87.59008026123047, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 0.5983, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 8.4375, |
|
"grad_norm": 62.683250427246094, |
|
"learning_rate": 4.618055555555556e-05, |
|
"loss": 0.6635, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"grad_norm": 29.072847366333008, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 0.6459, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 8.5625, |
|
"grad_norm": 54.492469787597656, |
|
"learning_rate": 4.604166666666666e-05, |
|
"loss": 0.701, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 8.625, |
|
"grad_norm": 38.17914962768555, |
|
"learning_rate": 4.5972222222222225e-05, |
|
"loss": 0.6271, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 8.6875, |
|
"grad_norm": 72.31561279296875, |
|
"learning_rate": 4.590277777777778e-05, |
|
"loss": 0.6277, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 69.48700714111328, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.6243, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.8125, |
|
"grad_norm": 45.070735931396484, |
|
"learning_rate": 4.576388888888889e-05, |
|
"loss": 0.6534, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 8.875, |
|
"grad_norm": 54.82920455932617, |
|
"learning_rate": 4.569444444444444e-05, |
|
"loss": 0.6283, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 8.9375, |
|
"grad_norm": 83.2965087890625, |
|
"learning_rate": 4.5625e-05, |
|
"loss": 0.6478, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 163.7074737548828, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.5977, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5967741935483871, |
|
"eval_auc": 0.7064075630252101, |
|
"eval_f1": 0.7191011235955056, |
|
"eval_loss": 0.6552733182907104, |
|
"eval_precision": 0.5818181818181818, |
|
"eval_recall": 0.9411764705882353, |
|
"eval_runtime": 1.9183, |
|
"eval_samples_per_second": 32.321, |
|
"eval_steps_per_second": 1.043, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 9.0625, |
|
"grad_norm": 79.20127868652344, |
|
"learning_rate": 4.5486111111111114e-05, |
|
"loss": 0.5693, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 9.125, |
|
"grad_norm": 39.74399185180664, |
|
"learning_rate": 4.541666666666667e-05, |
|
"loss": 0.6826, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 9.1875, |
|
"grad_norm": 48.851234436035156, |
|
"learning_rate": 4.534722222222222e-05, |
|
"loss": 0.681, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 32.17538833618164, |
|
"learning_rate": 4.527777777777778e-05, |
|
"loss": 0.593, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 9.3125, |
|
"grad_norm": 84.288330078125, |
|
"learning_rate": 4.520833333333334e-05, |
|
"loss": 0.6222, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 41.05829620361328, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.6035, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 9.4375, |
|
"grad_norm": 61.59003829956055, |
|
"learning_rate": 4.506944444444445e-05, |
|
"loss": 0.6007, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 78.13147735595703, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6533, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 9.5625, |
|
"grad_norm": 58.18182373046875, |
|
"learning_rate": 4.493055555555556e-05, |
|
"loss": 0.6033, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 9.625, |
|
"grad_norm": 43.28728485107422, |
|
"learning_rate": 4.486111111111111e-05, |
|
"loss": 0.6, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 9.6875, |
|
"grad_norm": 48.38688278198242, |
|
"learning_rate": 4.4791666666666673e-05, |
|
"loss": 0.6678, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"grad_norm": 60.13078689575195, |
|
"learning_rate": 4.472222222222223e-05, |
|
"loss": 0.6273, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 9.8125, |
|
"grad_norm": 42.91427230834961, |
|
"learning_rate": 4.465277777777778e-05, |
|
"loss": 0.6301, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 9.875, |
|
"grad_norm": 48.625492095947266, |
|
"learning_rate": 4.458333333333334e-05, |
|
"loss": 0.5844, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 9.9375, |
|
"grad_norm": 34.01121139526367, |
|
"learning_rate": 4.4513888888888885e-05, |
|
"loss": 0.7302, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 89.23443603515625, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.6165, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7258064516129032, |
|
"eval_auc": 0.8161764705882353, |
|
"eval_f1": 0.7951807228915663, |
|
"eval_loss": 0.609186053276062, |
|
"eval_precision": 0.673469387755102, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 1.9257, |
|
"eval_samples_per_second": 32.196, |
|
"eval_steps_per_second": 1.039, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.0625, |
|
"grad_norm": 33.16124725341797, |
|
"learning_rate": 4.4375e-05, |
|
"loss": 0.6516, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 10.125, |
|
"grad_norm": 44.67818832397461, |
|
"learning_rate": 4.4305555555555556e-05, |
|
"loss": 0.7222, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 10.1875, |
|
"grad_norm": 44.10664749145508, |
|
"learning_rate": 4.423611111111111e-05, |
|
"loss": 0.6145, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"grad_norm": 44.849090576171875, |
|
"learning_rate": 4.4166666666666665e-05, |
|
"loss": 0.6475, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 10.3125, |
|
"grad_norm": 42.657108306884766, |
|
"learning_rate": 4.4097222222222226e-05, |
|
"loss": 0.5923, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 10.375, |
|
"grad_norm": 60.3536262512207, |
|
"learning_rate": 4.402777777777778e-05, |
|
"loss": 0.7048, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 10.4375, |
|
"grad_norm": 89.41458892822266, |
|
"learning_rate": 4.3958333333333336e-05, |
|
"loss": 0.5677, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"grad_norm": 73.32341003417969, |
|
"learning_rate": 4.388888888888889e-05, |
|
"loss": 0.7057, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 10.5625, |
|
"grad_norm": 42.202144622802734, |
|
"learning_rate": 4.3819444444444445e-05, |
|
"loss": 0.6323, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 10.625, |
|
"grad_norm": 33.11185073852539, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.5757, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 10.6875, |
|
"grad_norm": 37.71356201171875, |
|
"learning_rate": 4.368055555555556e-05, |
|
"loss": 0.5549, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"grad_norm": 65.49434661865234, |
|
"learning_rate": 4.3611111111111116e-05, |
|
"loss": 0.5752, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 10.8125, |
|
"grad_norm": 52.854515075683594, |
|
"learning_rate": 4.354166666666667e-05, |
|
"loss": 0.6448, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 10.875, |
|
"grad_norm": 93.11141204833984, |
|
"learning_rate": 4.3472222222222225e-05, |
|
"loss": 0.7771, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 10.9375, |
|
"grad_norm": 48.91088104248047, |
|
"learning_rate": 4.340277777777778e-05, |
|
"loss": 0.5415, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 178.06539916992188, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.7217, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5967741935483871, |
|
"eval_auc": 0.6701680672268908, |
|
"eval_f1": 0.691358024691358, |
|
"eval_loss": 0.659967303276062, |
|
"eval_precision": 0.5957446808510638, |
|
"eval_recall": 0.8235294117647058, |
|
"eval_runtime": 1.9464, |
|
"eval_samples_per_second": 31.854, |
|
"eval_steps_per_second": 1.028, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 11.0625, |
|
"grad_norm": 67.89295959472656, |
|
"learning_rate": 4.3263888888888895e-05, |
|
"loss": 0.6306, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 11.125, |
|
"grad_norm": 36.25627517700195, |
|
"learning_rate": 4.319444444444445e-05, |
|
"loss": 0.4834, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 11.1875, |
|
"grad_norm": 43.655174255371094, |
|
"learning_rate": 4.3125000000000005e-05, |
|
"loss": 0.5637, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"grad_norm": 51.83556365966797, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.5269, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.3125, |
|
"grad_norm": 50.347084045410156, |
|
"learning_rate": 4.2986111111111114e-05, |
|
"loss": 0.5744, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 11.375, |
|
"grad_norm": 71.25247192382812, |
|
"learning_rate": 4.291666666666667e-05, |
|
"loss": 0.6053, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 11.4375, |
|
"grad_norm": 48.39303970336914, |
|
"learning_rate": 4.284722222222222e-05, |
|
"loss": 0.6816, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"grad_norm": 51.63835525512695, |
|
"learning_rate": 4.277777777777778e-05, |
|
"loss": 0.6187, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 11.5625, |
|
"grad_norm": 50.23905563354492, |
|
"learning_rate": 4.270833333333333e-05, |
|
"loss": 0.6252, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 11.625, |
|
"grad_norm": 81.17058563232422, |
|
"learning_rate": 4.263888888888889e-05, |
|
"loss": 0.6522, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 11.6875, |
|
"grad_norm": 74.09992218017578, |
|
"learning_rate": 4.256944444444445e-05, |
|
"loss": 0.5702, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"grad_norm": 32.29295349121094, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.5317, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 11.8125, |
|
"grad_norm": 64.11018371582031, |
|
"learning_rate": 4.243055555555556e-05, |
|
"loss": 0.5979, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 11.875, |
|
"grad_norm": 131.69041442871094, |
|
"learning_rate": 4.236111111111111e-05, |
|
"loss": 0.5775, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 11.9375, |
|
"grad_norm": 49.00139236450195, |
|
"learning_rate": 4.229166666666667e-05, |
|
"loss": 0.6661, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 154.61660766601562, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.6667, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6935483870967742, |
|
"eval_auc": 0.7804621848739496, |
|
"eval_f1": 0.759493670886076, |
|
"eval_loss": 0.5980972051620483, |
|
"eval_precision": 0.6666666666666666, |
|
"eval_recall": 0.8823529411764706, |
|
"eval_runtime": 1.9477, |
|
"eval_samples_per_second": 31.832, |
|
"eval_steps_per_second": 1.027, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 12.0625, |
|
"grad_norm": 80.12366485595703, |
|
"learning_rate": 4.215277777777778e-05, |
|
"loss": 0.5312, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 12.125, |
|
"grad_norm": 81.18545532226562, |
|
"learning_rate": 4.208333333333334e-05, |
|
"loss": 0.608, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 12.1875, |
|
"grad_norm": 63.287353515625, |
|
"learning_rate": 4.201388888888889e-05, |
|
"loss": 0.5273, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"grad_norm": 45.18925857543945, |
|
"learning_rate": 4.194444444444445e-05, |
|
"loss": 0.6504, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 12.3125, |
|
"grad_norm": 72.20451354980469, |
|
"learning_rate": 4.1875e-05, |
|
"loss": 0.5665, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 12.375, |
|
"grad_norm": 65.66788482666016, |
|
"learning_rate": 4.1805555555555556e-05, |
|
"loss": 0.527, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 12.4375, |
|
"grad_norm": 50.52779769897461, |
|
"learning_rate": 4.173611111111112e-05, |
|
"loss": 0.4926, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 73.23728942871094, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5988, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.5625, |
|
"grad_norm": 61.189208984375, |
|
"learning_rate": 4.159722222222223e-05, |
|
"loss": 0.562, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 12.625, |
|
"grad_norm": 62.635154724121094, |
|
"learning_rate": 4.152777777777778e-05, |
|
"loss": 0.6054, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 12.6875, |
|
"grad_norm": 50.976192474365234, |
|
"learning_rate": 4.1458333333333336e-05, |
|
"loss": 0.5327, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"grad_norm": 60.057830810546875, |
|
"learning_rate": 4.138888888888889e-05, |
|
"loss": 0.5172, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 12.8125, |
|
"grad_norm": 64.1837158203125, |
|
"learning_rate": 4.1319444444444445e-05, |
|
"loss": 0.5976, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 12.875, |
|
"grad_norm": 46.790653228759766, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.576, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 12.9375, |
|
"grad_norm": 227.32740783691406, |
|
"learning_rate": 4.1180555555555554e-05, |
|
"loss": 0.6454, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 102.79993438720703, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.5299, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7419354838709677, |
|
"eval_auc": 0.8508403361344538, |
|
"eval_f1": 0.7948717948717948, |
|
"eval_loss": 0.5460716485977173, |
|
"eval_precision": 0.7045454545454546, |
|
"eval_recall": 0.9117647058823529, |
|
"eval_runtime": 1.9335, |
|
"eval_samples_per_second": 32.066, |
|
"eval_steps_per_second": 1.034, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 13.0625, |
|
"grad_norm": 48.264347076416016, |
|
"learning_rate": 4.104166666666667e-05, |
|
"loss": 0.5178, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 13.125, |
|
"grad_norm": 110.42784118652344, |
|
"learning_rate": 4.0972222222222225e-05, |
|
"loss": 0.608, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 13.1875, |
|
"grad_norm": 220.24856567382812, |
|
"learning_rate": 4.090277777777778e-05, |
|
"loss": 0.584, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"grad_norm": 114.8612060546875, |
|
"learning_rate": 4.0833333333333334e-05, |
|
"loss": 0.5646, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 13.3125, |
|
"grad_norm": 58.41973114013672, |
|
"learning_rate": 4.076388888888889e-05, |
|
"loss": 0.5574, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 13.375, |
|
"grad_norm": 100.16024780273438, |
|
"learning_rate": 4.0694444444444444e-05, |
|
"loss": 0.5134, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 13.4375, |
|
"grad_norm": 80.44810485839844, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 0.5714, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"grad_norm": 67.43321228027344, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.5523, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 13.5625, |
|
"grad_norm": 43.3171501159668, |
|
"learning_rate": 4.0486111111111114e-05, |
|
"loss": 0.4604, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 13.625, |
|
"grad_norm": 40.656898498535156, |
|
"learning_rate": 4.041666666666667e-05, |
|
"loss": 0.5281, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 13.6875, |
|
"grad_norm": 84.43306732177734, |
|
"learning_rate": 4.0347222222222223e-05, |
|
"loss": 0.5104, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"grad_norm": 54.887840270996094, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.592, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 13.8125, |
|
"grad_norm": 53.76454162597656, |
|
"learning_rate": 4.020833333333334e-05, |
|
"loss": 0.4651, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 13.875, |
|
"grad_norm": 36.3071403503418, |
|
"learning_rate": 4.0138888888888894e-05, |
|
"loss": 0.492, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 13.9375, |
|
"grad_norm": 107.2224349975586, |
|
"learning_rate": 4.006944444444445e-05, |
|
"loss": 0.6049, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 131.88079833984375, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4969, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7580645161290323, |
|
"eval_auc": 0.8109243697478992, |
|
"eval_f1": 0.7540983606557377, |
|
"eval_loss": 0.5879219770431519, |
|
"eval_precision": 0.8518518518518519, |
|
"eval_recall": 0.6764705882352942, |
|
"eval_runtime": 1.9506, |
|
"eval_samples_per_second": 31.785, |
|
"eval_steps_per_second": 1.025, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 14.0625, |
|
"grad_norm": 58.407073974609375, |
|
"learning_rate": 3.993055555555556e-05, |
|
"loss": 0.5254, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 14.125, |
|
"grad_norm": 80.39613342285156, |
|
"learning_rate": 3.986111111111111e-05, |
|
"loss": 0.5312, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 14.1875, |
|
"grad_norm": 61.52587890625, |
|
"learning_rate": 3.979166666666667e-05, |
|
"loss": 0.5522, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"grad_norm": 99.368896484375, |
|
"learning_rate": 3.972222222222222e-05, |
|
"loss": 0.5006, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 14.3125, |
|
"grad_norm": 88.41030883789062, |
|
"learning_rate": 3.9652777777777776e-05, |
|
"loss": 0.5121, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 14.375, |
|
"grad_norm": 60.34226989746094, |
|
"learning_rate": 3.958333333333333e-05, |
|
"loss": 0.4733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 14.4375, |
|
"grad_norm": 51.18446731567383, |
|
"learning_rate": 3.951388888888889e-05, |
|
"loss": 0.5245, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"grad_norm": 72.11532592773438, |
|
"learning_rate": 3.944444444444445e-05, |
|
"loss": 0.5194, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 14.5625, |
|
"grad_norm": 40.04228210449219, |
|
"learning_rate": 3.9375e-05, |
|
"loss": 0.705, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 14.625, |
|
"grad_norm": 37.71114730834961, |
|
"learning_rate": 3.9305555555555556e-05, |
|
"loss": 0.4384, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 14.6875, |
|
"grad_norm": 121.4798812866211, |
|
"learning_rate": 3.923611111111111e-05, |
|
"loss": 0.5156, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"grad_norm": 42.07125473022461, |
|
"learning_rate": 3.9166666666666665e-05, |
|
"loss": 0.55, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 14.8125, |
|
"grad_norm": 29.359413146972656, |
|
"learning_rate": 3.909722222222223e-05, |
|
"loss": 0.5123, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 14.875, |
|
"grad_norm": 44.849979400634766, |
|
"learning_rate": 3.902777777777778e-05, |
|
"loss": 0.525, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 14.9375, |
|
"grad_norm": 55.50422668457031, |
|
"learning_rate": 3.8958333333333336e-05, |
|
"loss": 0.5289, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 139.26576232910156, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.5433, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7903225806451613, |
|
"eval_auc": 0.8771008403361344, |
|
"eval_f1": 0.8, |
|
"eval_loss": 0.5240045189857483, |
|
"eval_precision": 0.8387096774193549, |
|
"eval_recall": 0.7647058823529411, |
|
"eval_runtime": 1.931, |
|
"eval_samples_per_second": 32.108, |
|
"eval_steps_per_second": 1.036, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.0625, |
|
"grad_norm": 78.3478775024414, |
|
"learning_rate": 3.8819444444444445e-05, |
|
"loss": 0.496, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 15.125, |
|
"grad_norm": 83.29708099365234, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.5401, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 15.1875, |
|
"grad_norm": 40.72947692871094, |
|
"learning_rate": 3.868055555555556e-05, |
|
"loss": 0.4114, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"grad_norm": 34.38518524169922, |
|
"learning_rate": 3.8611111111111116e-05, |
|
"loss": 0.4647, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 15.3125, |
|
"grad_norm": 45.165794372558594, |
|
"learning_rate": 3.854166666666667e-05, |
|
"loss": 0.5248, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 15.375, |
|
"grad_norm": 63.5728759765625, |
|
"learning_rate": 3.8472222222222225e-05, |
|
"loss": 0.5442, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 15.4375, |
|
"grad_norm": 58.13555908203125, |
|
"learning_rate": 3.840277777777778e-05, |
|
"loss": 0.6143, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"grad_norm": 36.5289421081543, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.4758, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 15.5625, |
|
"grad_norm": 54.04985046386719, |
|
"learning_rate": 3.826388888888889e-05, |
|
"loss": 0.4218, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 72.28607940673828, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 0.5893, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 15.6875, |
|
"grad_norm": 74.44154357910156, |
|
"learning_rate": 3.8125e-05, |
|
"loss": 0.6552, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"grad_norm": 44.40108871459961, |
|
"learning_rate": 3.805555555555555e-05, |
|
"loss": 0.4867, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 15.8125, |
|
"grad_norm": 73.97761535644531, |
|
"learning_rate": 3.7986111111111114e-05, |
|
"loss": 0.6184, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 15.875, |
|
"grad_norm": 141.63430786132812, |
|
"learning_rate": 3.791666666666667e-05, |
|
"loss": 0.4479, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 15.9375, |
|
"grad_norm": 71.14151763916016, |
|
"learning_rate": 3.7847222222222224e-05, |
|
"loss": 0.3983, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 72.08846282958984, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.3454, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7419354838709677, |
|
"eval_auc": 0.8261554621848739, |
|
"eval_f1": 0.7714285714285715, |
|
"eval_loss": 0.5412834286689758, |
|
"eval_precision": 0.75, |
|
"eval_recall": 0.7941176470588235, |
|
"eval_runtime": 1.9248, |
|
"eval_samples_per_second": 32.212, |
|
"eval_steps_per_second": 1.039, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 16.0625, |
|
"grad_norm": 51.053855895996094, |
|
"learning_rate": 3.770833333333333e-05, |
|
"loss": 0.4064, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 16.125, |
|
"grad_norm": 61.72437286376953, |
|
"learning_rate": 3.763888888888889e-05, |
|
"loss": 0.5844, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 16.1875, |
|
"grad_norm": 68.33097076416016, |
|
"learning_rate": 3.756944444444445e-05, |
|
"loss": 0.5247, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"grad_norm": 72.65277862548828, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.492, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 16.3125, |
|
"grad_norm": 48.784759521484375, |
|
"learning_rate": 3.743055555555556e-05, |
|
"loss": 0.5779, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 16.375, |
|
"grad_norm": 94.48021697998047, |
|
"learning_rate": 3.736111111111111e-05, |
|
"loss": 0.6233, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 16.4375, |
|
"grad_norm": 123.79901885986328, |
|
"learning_rate": 3.729166666666667e-05, |
|
"loss": 0.5248, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"grad_norm": 42.541114807128906, |
|
"learning_rate": 3.722222222222222e-05, |
|
"loss": 0.4769, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 16.5625, |
|
"grad_norm": 40.07915115356445, |
|
"learning_rate": 3.715277777777778e-05, |
|
"loss": 0.411, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 16.625, |
|
"grad_norm": 66.4336929321289, |
|
"learning_rate": 3.708333333333334e-05, |
|
"loss": 0.4531, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 16.6875, |
|
"grad_norm": 36.28628158569336, |
|
"learning_rate": 3.701388888888889e-05, |
|
"loss": 0.4582, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"grad_norm": 43.74733352661133, |
|
"learning_rate": 3.694444444444445e-05, |
|
"loss": 0.3818, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 16.8125, |
|
"grad_norm": 61.93031692504883, |
|
"learning_rate": 3.6875e-05, |
|
"loss": 0.4218, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 16.875, |
|
"grad_norm": 47.1074333190918, |
|
"learning_rate": 3.6805555555555556e-05, |
|
"loss": 0.4234, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 16.9375, |
|
"grad_norm": 91.64878845214844, |
|
"learning_rate": 3.673611111111112e-05, |
|
"loss": 0.5807, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 85.23784637451172, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.6552, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7580645161290323, |
|
"eval_auc": 0.8692226890756303, |
|
"eval_f1": 0.7945205479452054, |
|
"eval_loss": 0.4790668785572052, |
|
"eval_precision": 0.7435897435897436, |
|
"eval_recall": 0.8529411764705882, |
|
"eval_runtime": 1.9244, |
|
"eval_samples_per_second": 32.217, |
|
"eval_steps_per_second": 1.039, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 17.0625, |
|
"grad_norm": 73.90734100341797, |
|
"learning_rate": 3.659722222222222e-05, |
|
"loss": 0.79, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 17.125, |
|
"grad_norm": 94.85003662109375, |
|
"learning_rate": 3.6527777777777775e-05, |
|
"loss": 0.6025, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 17.1875, |
|
"grad_norm": 75.04529571533203, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"loss": 0.4477, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"grad_norm": 37.73780059814453, |
|
"learning_rate": 3.638888888888889e-05, |
|
"loss": 0.5583, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 17.3125, |
|
"grad_norm": 97.058349609375, |
|
"learning_rate": 3.6319444444444446e-05, |
|
"loss": 0.4121, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 17.375, |
|
"grad_norm": 41.515113830566406, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.4931, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 17.4375, |
|
"grad_norm": 89.66553497314453, |
|
"learning_rate": 3.6180555555555555e-05, |
|
"loss": 0.5748, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 45.831058502197266, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.5429, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 17.5625, |
|
"grad_norm": 37.5870246887207, |
|
"learning_rate": 3.604166666666667e-05, |
|
"loss": 0.4326, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 17.625, |
|
"grad_norm": 69.45463562011719, |
|
"learning_rate": 3.5972222222222225e-05, |
|
"loss": 0.4083, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 17.6875, |
|
"grad_norm": 89.9836654663086, |
|
"learning_rate": 3.590277777777778e-05, |
|
"loss": 0.6454, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"grad_norm": 157.84072875976562, |
|
"learning_rate": 3.5833333333333335e-05, |
|
"loss": 0.4036, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 17.8125, |
|
"grad_norm": 144.50209045410156, |
|
"learning_rate": 3.576388888888889e-05, |
|
"loss": 0.5848, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 17.875, |
|
"grad_norm": 89.03966522216797, |
|
"learning_rate": 3.5694444444444444e-05, |
|
"loss": 0.3845, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 17.9375, |
|
"grad_norm": 43.99159622192383, |
|
"learning_rate": 3.5625000000000005e-05, |
|
"loss": 0.5129, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 47.54975891113281, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.4147, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_auc": 0.8939075630252101, |
|
"eval_f1": 0.8181818181818182, |
|
"eval_loss": 0.43667498230934143, |
|
"eval_precision": 0.84375, |
|
"eval_recall": 0.7941176470588235, |
|
"eval_runtime": 1.9961, |
|
"eval_samples_per_second": 31.06, |
|
"eval_steps_per_second": 1.002, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 18.0625, |
|
"grad_norm": 103.01007080078125, |
|
"learning_rate": 3.5486111111111115e-05, |
|
"loss": 0.5238, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 18.125, |
|
"grad_norm": 108.39590454101562, |
|
"learning_rate": 3.541666666666667e-05, |
|
"loss": 0.5119, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 18.1875, |
|
"grad_norm": 36.57012176513672, |
|
"learning_rate": 3.5347222222222224e-05, |
|
"loss": 0.375, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"grad_norm": 64.35926818847656, |
|
"learning_rate": 3.527777777777778e-05, |
|
"loss": 0.7708, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 18.3125, |
|
"grad_norm": 57.50725555419922, |
|
"learning_rate": 3.520833333333334e-05, |
|
"loss": 0.4307, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 18.375, |
|
"grad_norm": 94.58295440673828, |
|
"learning_rate": 3.513888888888889e-05, |
|
"loss": 0.5037, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 18.4375, |
|
"grad_norm": 66.14063262939453, |
|
"learning_rate": 3.506944444444444e-05, |
|
"loss": 0.3167, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"grad_norm": 101.09774017333984, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.504, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 18.5625, |
|
"grad_norm": 60.17547607421875, |
|
"learning_rate": 3.493055555555556e-05, |
|
"loss": 0.4903, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 18.625, |
|
"grad_norm": 83.2125015258789, |
|
"learning_rate": 3.486111111111111e-05, |
|
"loss": 0.605, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 18.6875, |
|
"grad_norm": 65.9618911743164, |
|
"learning_rate": 3.479166666666667e-05, |
|
"loss": 0.4312, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 29.95576286315918, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.3029, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.8125, |
|
"grad_norm": 100.61408996582031, |
|
"learning_rate": 3.465277777777778e-05, |
|
"loss": 0.4785, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 18.875, |
|
"grad_norm": 73.51964569091797, |
|
"learning_rate": 3.458333333333333e-05, |
|
"loss": 0.3882, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 18.9375, |
|
"grad_norm": 43.713768005371094, |
|
"learning_rate": 3.451388888888889e-05, |
|
"loss": 0.6225, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 246.18128967285156, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.5218, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7580645161290323, |
|
"eval_auc": 0.8660714285714286, |
|
"eval_f1": 0.7457627118644068, |
|
"eval_loss": 0.4833846688270569, |
|
"eval_precision": 0.88, |
|
"eval_recall": 0.6470588235294118, |
|
"eval_runtime": 1.9511, |
|
"eval_samples_per_second": 31.776, |
|
"eval_steps_per_second": 1.025, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 19.0625, |
|
"grad_norm": 106.1519546508789, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 0.5315, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 19.125, |
|
"grad_norm": 65.67142486572266, |
|
"learning_rate": 3.430555555555556e-05, |
|
"loss": 0.6993, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 19.1875, |
|
"grad_norm": 36.1142692565918, |
|
"learning_rate": 3.423611111111111e-05, |
|
"loss": 0.3465, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 19.25, |
|
"grad_norm": 46.80392837524414, |
|
"learning_rate": 3.4166666666666666e-05, |
|
"loss": 0.3667, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 19.3125, |
|
"grad_norm": 55.320953369140625, |
|
"learning_rate": 3.409722222222223e-05, |
|
"loss": 0.4295, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 19.375, |
|
"grad_norm": 40.02817153930664, |
|
"learning_rate": 3.402777777777778e-05, |
|
"loss": 0.3958, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 19.4375, |
|
"grad_norm": 84.88871002197266, |
|
"learning_rate": 3.3958333333333337e-05, |
|
"loss": 0.4724, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"grad_norm": 63.09480667114258, |
|
"learning_rate": 3.388888888888889e-05, |
|
"loss": 0.4288, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 19.5625, |
|
"grad_norm": 61.246192932128906, |
|
"learning_rate": 3.3819444444444446e-05, |
|
"loss": 0.3882, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 19.625, |
|
"grad_norm": 56.286766052246094, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.506, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 19.6875, |
|
"grad_norm": 65.18053436279297, |
|
"learning_rate": 3.368055555555556e-05, |
|
"loss": 0.4222, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"grad_norm": 71.56250762939453, |
|
"learning_rate": 3.3611111111111116e-05, |
|
"loss": 0.4521, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 19.8125, |
|
"grad_norm": 79.5528564453125, |
|
"learning_rate": 3.3541666666666664e-05, |
|
"loss": 0.5815, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 19.875, |
|
"grad_norm": 80.54459381103516, |
|
"learning_rate": 3.347222222222222e-05, |
|
"loss": 0.5034, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 19.9375, |
|
"grad_norm": 46.42332458496094, |
|
"learning_rate": 3.340277777777778e-05, |
|
"loss": 0.3943, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 265.7347412109375, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.8294, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7419354838709677, |
|
"eval_auc": 0.8277310924369747, |
|
"eval_f1": 0.75, |
|
"eval_loss": 0.5033332705497742, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.7058823529411765, |
|
"eval_runtime": 1.9359, |
|
"eval_samples_per_second": 32.026, |
|
"eval_steps_per_second": 1.033, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 20.0625, |
|
"grad_norm": 80.59957122802734, |
|
"learning_rate": 3.326388888888889e-05, |
|
"loss": 0.55, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 20.125, |
|
"grad_norm": 59.34410858154297, |
|
"learning_rate": 3.3194444444444444e-05, |
|
"loss": 0.4545, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 20.1875, |
|
"grad_norm": 190.26165771484375, |
|
"learning_rate": 3.3125e-05, |
|
"loss": 0.5782, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"grad_norm": 73.1745834350586, |
|
"learning_rate": 3.3055555555555553e-05, |
|
"loss": 0.42, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 20.3125, |
|
"grad_norm": 41.371578216552734, |
|
"learning_rate": 3.2986111111111115e-05, |
|
"loss": 0.2567, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 20.375, |
|
"grad_norm": 59.37371063232422, |
|
"learning_rate": 3.291666666666667e-05, |
|
"loss": 0.4721, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 20.4375, |
|
"grad_norm": 64.93104553222656, |
|
"learning_rate": 3.2847222222222224e-05, |
|
"loss": 0.4373, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"grad_norm": 86.97407531738281, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 0.3585, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 20.5625, |
|
"grad_norm": 65.34368896484375, |
|
"learning_rate": 3.270833333333333e-05, |
|
"loss": 0.3815, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 20.625, |
|
"grad_norm": 62.39166259765625, |
|
"learning_rate": 3.263888888888889e-05, |
|
"loss": 0.3582, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 20.6875, |
|
"grad_norm": 71.4781265258789, |
|
"learning_rate": 3.256944444444445e-05, |
|
"loss": 0.3781, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"grad_norm": 34.324867248535156, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3129, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 20.8125, |
|
"grad_norm": 86.5385971069336, |
|
"learning_rate": 3.243055555555556e-05, |
|
"loss": 0.3903, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 20.875, |
|
"grad_norm": 97.72544860839844, |
|
"learning_rate": 3.236111111111111e-05, |
|
"loss": 0.3329, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 20.9375, |
|
"grad_norm": 124.9569091796875, |
|
"learning_rate": 3.229166666666667e-05, |
|
"loss": 0.5095, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 197.2810516357422, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.7458, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_auc": 0.9028361344537815, |
|
"eval_f1": 0.8378378378378378, |
|
"eval_loss": 0.4212134778499603, |
|
"eval_precision": 0.775, |
|
"eval_recall": 0.9117647058823529, |
|
"eval_runtime": 1.9253, |
|
"eval_samples_per_second": 32.203, |
|
"eval_steps_per_second": 1.039, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 21.0625, |
|
"grad_norm": 78.05635070800781, |
|
"learning_rate": 3.2152777777777784e-05, |
|
"loss": 0.3589, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 21.125, |
|
"grad_norm": 114.2430191040039, |
|
"learning_rate": 3.208333333333334e-05, |
|
"loss": 0.6472, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 21.1875, |
|
"grad_norm": 74.19608306884766, |
|
"learning_rate": 3.2013888888888886e-05, |
|
"loss": 0.4214, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"grad_norm": 75.80211639404297, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.384, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 21.3125, |
|
"grad_norm": 43.8707389831543, |
|
"learning_rate": 3.1875e-05, |
|
"loss": 0.3883, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 21.375, |
|
"grad_norm": 145.6295166015625, |
|
"learning_rate": 3.180555555555556e-05, |
|
"loss": 0.392, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 21.4375, |
|
"grad_norm": 41.334449768066406, |
|
"learning_rate": 3.173611111111111e-05, |
|
"loss": 0.3186, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"grad_norm": 114.87351989746094, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.5843, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 21.5625, |
|
"grad_norm": 38.9747200012207, |
|
"learning_rate": 3.159722222222222e-05, |
|
"loss": 0.2425, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 21.625, |
|
"grad_norm": 60.7866096496582, |
|
"learning_rate": 3.1527777777777775e-05, |
|
"loss": 0.3217, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 21.6875, |
|
"grad_norm": 106.74617004394531, |
|
"learning_rate": 3.145833333333334e-05, |
|
"loss": 0.504, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 21.75, |
|
"grad_norm": 135.9730224609375, |
|
"learning_rate": 3.138888888888889e-05, |
|
"loss": 0.3214, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 21.8125, |
|
"grad_norm": 110.94549560546875, |
|
"learning_rate": 3.1319444444444446e-05, |
|
"loss": 0.3475, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 21.875, |
|
"grad_norm": 96.92147064208984, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.5358, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 21.9375, |
|
"grad_norm": 71.57472229003906, |
|
"learning_rate": 3.1180555555555555e-05, |
|
"loss": 0.4589, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 164.44607543945312, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.5776, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_auc": 0.9269957983193277, |
|
"eval_f1": 0.8529411764705882, |
|
"eval_loss": 0.39069586992263794, |
|
"eval_precision": 0.8529411764705882, |
|
"eval_recall": 0.8529411764705882, |
|
"eval_runtime": 1.9366, |
|
"eval_samples_per_second": 32.015, |
|
"eval_steps_per_second": 1.033, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 22.0625, |
|
"grad_norm": 118.7387924194336, |
|
"learning_rate": 3.104166666666667e-05, |
|
"loss": 0.4948, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 22.125, |
|
"grad_norm": 98.81047821044922, |
|
"learning_rate": 3.0972222222222226e-05, |
|
"loss": 0.3103, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 22.1875, |
|
"grad_norm": 26.512624740600586, |
|
"learning_rate": 3.090277777777778e-05, |
|
"loss": 0.2216, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"grad_norm": 53.15692138671875, |
|
"learning_rate": 3.0833333333333335e-05, |
|
"loss": 0.4647, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 22.3125, |
|
"grad_norm": 78.6286392211914, |
|
"learning_rate": 3.076388888888889e-05, |
|
"loss": 0.365, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 22.375, |
|
"grad_norm": 55.01898193359375, |
|
"learning_rate": 3.069444444444445e-05, |
|
"loss": 0.3692, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 22.4375, |
|
"grad_norm": 39.68163299560547, |
|
"learning_rate": 3.0625000000000006e-05, |
|
"loss": 0.2894, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"grad_norm": 120.19679260253906, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.4181, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 22.5625, |
|
"grad_norm": 68.91941833496094, |
|
"learning_rate": 3.0486111111111115e-05, |
|
"loss": 0.5036, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 22.625, |
|
"grad_norm": 72.7096176147461, |
|
"learning_rate": 3.0416666666666666e-05, |
|
"loss": 0.5384, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 22.6875, |
|
"grad_norm": 100.89299774169922, |
|
"learning_rate": 3.034722222222222e-05, |
|
"loss": 0.3722, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"grad_norm": 68.78106689453125, |
|
"learning_rate": 3.0277777777777776e-05, |
|
"loss": 0.55, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 22.8125, |
|
"grad_norm": 54.39569091796875, |
|
"learning_rate": 3.0208333333333334e-05, |
|
"loss": 0.4194, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 22.875, |
|
"grad_norm": 31.410484313964844, |
|
"learning_rate": 3.0138888888888888e-05, |
|
"loss": 0.2857, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 22.9375, |
|
"grad_norm": 57.578338623046875, |
|
"learning_rate": 3.0069444444444446e-05, |
|
"loss": 0.4926, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 151.53012084960938, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4875, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_auc": 0.926470588235294, |
|
"eval_f1": 0.8450704225352113, |
|
"eval_loss": 0.36559081077575684, |
|
"eval_precision": 0.8108108108108109, |
|
"eval_recall": 0.8823529411764706, |
|
"eval_runtime": 1.9594, |
|
"eval_samples_per_second": 31.642, |
|
"eval_steps_per_second": 1.021, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 23.0625, |
|
"grad_norm": 44.12751770019531, |
|
"learning_rate": 2.9930555555555555e-05, |
|
"loss": 0.3138, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 23.125, |
|
"grad_norm": 58.954830169677734, |
|
"learning_rate": 2.9861111111111113e-05, |
|
"loss": 0.3775, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 23.1875, |
|
"grad_norm": 26.135852813720703, |
|
"learning_rate": 2.9791666666666668e-05, |
|
"loss": 0.2591, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 23.25, |
|
"grad_norm": 62.40138626098633, |
|
"learning_rate": 2.9722222222222223e-05, |
|
"loss": 0.3695, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 23.3125, |
|
"grad_norm": 60.646183013916016, |
|
"learning_rate": 2.965277777777778e-05, |
|
"loss": 0.3589, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 23.375, |
|
"grad_norm": 88.36126708984375, |
|
"learning_rate": 2.9583333333333335e-05, |
|
"loss": 0.5168, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"grad_norm": 46.5753288269043, |
|
"learning_rate": 2.951388888888889e-05, |
|
"loss": 0.3464, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"grad_norm": 91.99535369873047, |
|
"learning_rate": 2.9444444444444448e-05, |
|
"loss": 0.4247, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 23.5625, |
|
"grad_norm": 49.523929595947266, |
|
"learning_rate": 2.9375000000000003e-05, |
|
"loss": 0.4344, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 23.625, |
|
"grad_norm": 54.50424575805664, |
|
"learning_rate": 2.9305555555555557e-05, |
|
"loss": 0.4092, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 23.6875, |
|
"grad_norm": 43.38228988647461, |
|
"learning_rate": 2.9236111111111115e-05, |
|
"loss": 0.2469, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"grad_norm": 48.993412017822266, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.3167, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 23.8125, |
|
"grad_norm": 67.48350524902344, |
|
"learning_rate": 2.9097222222222224e-05, |
|
"loss": 0.2573, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 23.875, |
|
"grad_norm": 57.00615692138672, |
|
"learning_rate": 2.9027777777777782e-05, |
|
"loss": 0.4354, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 23.9375, |
|
"grad_norm": 75.92513275146484, |
|
"learning_rate": 2.8958333333333337e-05, |
|
"loss": 0.4081, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 174.88877868652344, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.5516, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_auc": 0.9243697478991597, |
|
"eval_f1": 0.8333333333333334, |
|
"eval_loss": 0.3794221878051758, |
|
"eval_precision": 0.7894736842105263, |
|
"eval_recall": 0.8823529411764706, |
|
"eval_runtime": 1.9326, |
|
"eval_samples_per_second": 32.081, |
|
"eval_steps_per_second": 1.035, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 24.0625, |
|
"grad_norm": 63.44384002685547, |
|
"learning_rate": 2.8819444444444443e-05, |
|
"loss": 0.4828, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 24.125, |
|
"grad_norm": 56.574588775634766, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 0.5755, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 24.1875, |
|
"grad_norm": 45.03982925415039, |
|
"learning_rate": 2.8680555555555555e-05, |
|
"loss": 0.3155, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"grad_norm": 75.03779602050781, |
|
"learning_rate": 2.861111111111111e-05, |
|
"loss": 0.3325, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 24.3125, |
|
"grad_norm": 53.46479797363281, |
|
"learning_rate": 2.8541666666666668e-05, |
|
"loss": 0.3432, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 24.375, |
|
"grad_norm": 65.31179809570312, |
|
"learning_rate": 2.8472222222222223e-05, |
|
"loss": 0.4144, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 24.4375, |
|
"grad_norm": 93.96704864501953, |
|
"learning_rate": 2.8402777777777777e-05, |
|
"loss": 0.4689, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 24.5, |
|
"grad_norm": 53.54984664916992, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.4174, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 24.5625, |
|
"grad_norm": 84.127197265625, |
|
"learning_rate": 2.826388888888889e-05, |
|
"loss": 0.4033, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 24.625, |
|
"grad_norm": 100.4719009399414, |
|
"learning_rate": 2.8194444444444445e-05, |
|
"loss": 0.4023, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 24.6875, |
|
"grad_norm": 48.454376220703125, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 0.3631, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"grad_norm": 97.61786651611328, |
|
"learning_rate": 2.8055555555555557e-05, |
|
"loss": 0.4391, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 24.8125, |
|
"grad_norm": 90.77650451660156, |
|
"learning_rate": 2.7986111111111112e-05, |
|
"loss": 0.3198, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 24.875, |
|
"grad_norm": 56.34316635131836, |
|
"learning_rate": 2.791666666666667e-05, |
|
"loss": 0.3028, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 24.9375, |
|
"grad_norm": 56.22303009033203, |
|
"learning_rate": 2.7847222222222224e-05, |
|
"loss": 0.3363, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 94.18289947509766, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2376, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_auc": 0.9275210084033613, |
|
"eval_f1": 0.8421052631578947, |
|
"eval_loss": 0.38999098539352417, |
|
"eval_precision": 0.7619047619047619, |
|
"eval_recall": 0.9411764705882353, |
|
"eval_runtime": 1.9189, |
|
"eval_samples_per_second": 32.31, |
|
"eval_steps_per_second": 1.042, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.0625, |
|
"grad_norm": 37.627017974853516, |
|
"learning_rate": 2.7708333333333337e-05, |
|
"loss": 0.3413, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 25.125, |
|
"grad_norm": 30.828754425048828, |
|
"learning_rate": 2.7638888888888892e-05, |
|
"loss": 0.3494, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 25.1875, |
|
"grad_norm": 62.38092803955078, |
|
"learning_rate": 2.7569444444444446e-05, |
|
"loss": 0.4031, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"grad_norm": 61.06608581542969, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.4108, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 25.3125, |
|
"grad_norm": 48.06837844848633, |
|
"learning_rate": 2.743055555555556e-05, |
|
"loss": 0.3223, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 25.375, |
|
"grad_norm": 38.35547637939453, |
|
"learning_rate": 2.7361111111111114e-05, |
|
"loss": 0.5251, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 25.4375, |
|
"grad_norm": 89.24491119384766, |
|
"learning_rate": 2.7291666666666665e-05, |
|
"loss": 0.5164, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"grad_norm": 50.19062423706055, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 0.293, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 25.5625, |
|
"grad_norm": 75.44627380371094, |
|
"learning_rate": 2.7152777777777777e-05, |
|
"loss": 0.3971, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 25.625, |
|
"grad_norm": 42.66498565673828, |
|
"learning_rate": 2.7083333333333332e-05, |
|
"loss": 0.2638, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 25.6875, |
|
"grad_norm": 46.950252532958984, |
|
"learning_rate": 2.701388888888889e-05, |
|
"loss": 0.4027, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"grad_norm": 35.579078674316406, |
|
"learning_rate": 2.6944444444444445e-05, |
|
"loss": 0.3861, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 25.8125, |
|
"grad_norm": 64.6947021484375, |
|
"learning_rate": 2.6875e-05, |
|
"loss": 0.3807, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 25.875, |
|
"grad_norm": 78.13638305664062, |
|
"learning_rate": 2.6805555555555557e-05, |
|
"loss": 0.4848, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 25.9375, |
|
"grad_norm": 52.792789459228516, |
|
"learning_rate": 2.6736111111111112e-05, |
|
"loss": 0.3578, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 153.54739379882812, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.275, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_auc": 0.9322478991596639, |
|
"eval_f1": 0.8450704225352113, |
|
"eval_loss": 0.3614147901535034, |
|
"eval_precision": 0.8108108108108109, |
|
"eval_recall": 0.8823529411764706, |
|
"eval_runtime": 1.923, |
|
"eval_samples_per_second": 32.24, |
|
"eval_steps_per_second": 1.04, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 26.0625, |
|
"grad_norm": 117.4349136352539, |
|
"learning_rate": 2.6597222222222225e-05, |
|
"loss": 0.3249, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 26.125, |
|
"grad_norm": 43.56279373168945, |
|
"learning_rate": 2.652777777777778e-05, |
|
"loss": 0.3057, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 26.1875, |
|
"grad_norm": 36.80549240112305, |
|
"learning_rate": 2.6458333333333334e-05, |
|
"loss": 0.2349, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"grad_norm": 50.33206558227539, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.3677, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 26.3125, |
|
"grad_norm": 90.7452163696289, |
|
"learning_rate": 2.6319444444444446e-05, |
|
"loss": 0.2694, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 26.375, |
|
"grad_norm": 79.48680877685547, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.5367, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 26.4375, |
|
"grad_norm": 76.93234252929688, |
|
"learning_rate": 2.618055555555556e-05, |
|
"loss": 0.3667, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"grad_norm": 74.08578491210938, |
|
"learning_rate": 2.6111111111111114e-05, |
|
"loss": 0.288, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 26.5625, |
|
"grad_norm": 77.3045654296875, |
|
"learning_rate": 2.604166666666667e-05, |
|
"loss": 0.4342, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 26.625, |
|
"grad_norm": 68.01839447021484, |
|
"learning_rate": 2.5972222222222226e-05, |
|
"loss": 0.3474, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 26.6875, |
|
"grad_norm": 19.478015899658203, |
|
"learning_rate": 2.590277777777778e-05, |
|
"loss": 0.2049, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 26.75, |
|
"grad_norm": 97.51011657714844, |
|
"learning_rate": 2.5833333333333336e-05, |
|
"loss": 0.6356, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 26.8125, |
|
"grad_norm": 69.35116577148438, |
|
"learning_rate": 2.5763888888888887e-05, |
|
"loss": 0.328, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 26.875, |
|
"grad_norm": 72.81779479980469, |
|
"learning_rate": 2.5694444444444445e-05, |
|
"loss": 0.5902, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 26.9375, |
|
"grad_norm": 43.3619499206543, |
|
"learning_rate": 2.5625e-05, |
|
"loss": 0.3038, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 253.28797912597656, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.5285, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_auc": 0.9359243697478992, |
|
"eval_f1": 0.8732394366197183, |
|
"eval_loss": 0.33258941769599915, |
|
"eval_precision": 0.8378378378378378, |
|
"eval_recall": 0.9117647058823529, |
|
"eval_runtime": 1.9261, |
|
"eval_samples_per_second": 32.19, |
|
"eval_steps_per_second": 1.038, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 27.0625, |
|
"grad_norm": 85.61891174316406, |
|
"learning_rate": 2.5486111111111112e-05, |
|
"loss": 0.4318, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 27.125, |
|
"grad_norm": 50.95888137817383, |
|
"learning_rate": 2.5416666666666667e-05, |
|
"loss": 0.4233, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 27.1875, |
|
"grad_norm": 92.60533142089844, |
|
"learning_rate": 2.534722222222222e-05, |
|
"loss": 0.7241, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 27.25, |
|
"grad_norm": 41.785282135009766, |
|
"learning_rate": 2.527777777777778e-05, |
|
"loss": 0.234, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 27.3125, |
|
"grad_norm": 78.1746826171875, |
|
"learning_rate": 2.5208333333333334e-05, |
|
"loss": 0.6551, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 27.375, |
|
"grad_norm": 111.29540252685547, |
|
"learning_rate": 2.513888888888889e-05, |
|
"loss": 0.6107, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 27.4375, |
|
"grad_norm": 27.14556884765625, |
|
"learning_rate": 2.5069444444444447e-05, |
|
"loss": 0.1846, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"grad_norm": 56.8499870300293, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3405, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 27.5625, |
|
"grad_norm": 18.762271881103516, |
|
"learning_rate": 2.4930555555555556e-05, |
|
"loss": 0.1898, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 27.625, |
|
"grad_norm": 119.3090591430664, |
|
"learning_rate": 2.4861111111111114e-05, |
|
"loss": 0.4205, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 27.6875, |
|
"grad_norm": 36.69664001464844, |
|
"learning_rate": 2.479166666666667e-05, |
|
"loss": 0.2131, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"grad_norm": 67.54560852050781, |
|
"learning_rate": 2.4722222222222223e-05, |
|
"loss": 0.3711, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 27.8125, |
|
"grad_norm": 47.20517349243164, |
|
"learning_rate": 2.465277777777778e-05, |
|
"loss": 0.2076, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 27.875, |
|
"grad_norm": 54.34505081176758, |
|
"learning_rate": 2.4583333333333332e-05, |
|
"loss": 0.2704, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 27.9375, |
|
"grad_norm": 67.48538970947266, |
|
"learning_rate": 2.451388888888889e-05, |
|
"loss": 0.2752, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 142.4190216064453, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.4273, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8709677419354839, |
|
"eval_auc": 0.98109243697479, |
|
"eval_f1": 0.8888888888888888, |
|
"eval_loss": 0.2646819055080414, |
|
"eval_precision": 0.8421052631578947, |
|
"eval_recall": 0.9411764705882353, |
|
"eval_runtime": 1.9507, |
|
"eval_samples_per_second": 31.784, |
|
"eval_steps_per_second": 1.025, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 28.0625, |
|
"grad_norm": 75.59832763671875, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 0.3276, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 28.125, |
|
"grad_norm": 40.402252197265625, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.2804, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 28.1875, |
|
"grad_norm": 95.81023406982422, |
|
"learning_rate": 2.4236111111111112e-05, |
|
"loss": 0.4781, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 28.25, |
|
"grad_norm": 39.70941925048828, |
|
"learning_rate": 2.4166666666666667e-05, |
|
"loss": 0.1875, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 28.3125, |
|
"grad_norm": 40.53486251831055, |
|
"learning_rate": 2.4097222222222225e-05, |
|
"loss": 0.2517, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 28.375, |
|
"grad_norm": 56.1978759765625, |
|
"learning_rate": 2.402777777777778e-05, |
|
"loss": 0.1931, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 28.4375, |
|
"grad_norm": 111.22602081298828, |
|
"learning_rate": 2.3958333333333334e-05, |
|
"loss": 0.4768, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"grad_norm": 59.05950927734375, |
|
"learning_rate": 2.3888888888888892e-05, |
|
"loss": 0.6638, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 28.5625, |
|
"grad_norm": 64.51776885986328, |
|
"learning_rate": 2.3819444444444443e-05, |
|
"loss": 0.3341, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 28.625, |
|
"grad_norm": 67.53648376464844, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.4273, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 28.6875, |
|
"grad_norm": 78.49746704101562, |
|
"learning_rate": 2.3680555555555556e-05, |
|
"loss": 0.3185, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"grad_norm": 77.06168365478516, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.6245, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 28.8125, |
|
"grad_norm": 96.53307342529297, |
|
"learning_rate": 2.354166666666667e-05, |
|
"loss": 0.4567, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 28.875, |
|
"grad_norm": 111.6141586303711, |
|
"learning_rate": 2.3472222222222223e-05, |
|
"loss": 0.2718, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 28.9375, |
|
"grad_norm": 20.222061157226562, |
|
"learning_rate": 2.3402777777777778e-05, |
|
"loss": 0.1668, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 140.95428466796875, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.31, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_auc": 0.9396008403361344, |
|
"eval_f1": 0.8529411764705882, |
|
"eval_loss": 0.3340640664100647, |
|
"eval_precision": 0.8529411764705882, |
|
"eval_recall": 0.8529411764705882, |
|
"eval_runtime": 1.9401, |
|
"eval_samples_per_second": 31.958, |
|
"eval_steps_per_second": 1.031, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 29.0625, |
|
"grad_norm": 102.17808532714844, |
|
"learning_rate": 2.326388888888889e-05, |
|
"loss": 0.4793, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 29.125, |
|
"grad_norm": 194.98696899414062, |
|
"learning_rate": 2.3194444444444445e-05, |
|
"loss": 0.3459, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 29.1875, |
|
"grad_norm": 130.52801513671875, |
|
"learning_rate": 2.3125000000000003e-05, |
|
"loss": 0.5329, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"grad_norm": 42.69224548339844, |
|
"learning_rate": 2.3055555555555558e-05, |
|
"loss": 0.1876, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 29.3125, |
|
"grad_norm": 64.0096664428711, |
|
"learning_rate": 2.2986111111111112e-05, |
|
"loss": 0.2573, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 29.375, |
|
"grad_norm": 69.68462371826172, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 0.377, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 29.4375, |
|
"grad_norm": 66.27143096923828, |
|
"learning_rate": 2.284722222222222e-05, |
|
"loss": 0.3805, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"grad_norm": 33.939876556396484, |
|
"learning_rate": 2.277777777777778e-05, |
|
"loss": 0.3259, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 29.5625, |
|
"grad_norm": 58.171016693115234, |
|
"learning_rate": 2.2708333333333334e-05, |
|
"loss": 0.6124, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 29.625, |
|
"grad_norm": 62.77367401123047, |
|
"learning_rate": 2.263888888888889e-05, |
|
"loss": 0.3582, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 29.6875, |
|
"grad_norm": 57.211177825927734, |
|
"learning_rate": 2.2569444444444447e-05, |
|
"loss": 0.4051, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 29.75, |
|
"grad_norm": 98.49055480957031, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2648, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 29.8125, |
|
"grad_norm": 32.86750411987305, |
|
"learning_rate": 2.2430555555555556e-05, |
|
"loss": 0.24, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 29.875, |
|
"grad_norm": 62.225521087646484, |
|
"learning_rate": 2.2361111111111114e-05, |
|
"loss": 0.2557, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 29.9375, |
|
"grad_norm": 98.8927001953125, |
|
"learning_rate": 2.229166666666667e-05, |
|
"loss": 0.425, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 231.0799102783203, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.7769, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_auc": 0.9506302521008404, |
|
"eval_f1": 0.8732394366197183, |
|
"eval_loss": 0.2989482879638672, |
|
"eval_precision": 0.8378378378378378, |
|
"eval_recall": 0.9117647058823529, |
|
"eval_runtime": 1.9367, |
|
"eval_samples_per_second": 32.013, |
|
"eval_steps_per_second": 1.033, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 30.0625, |
|
"grad_norm": 97.84791564941406, |
|
"learning_rate": 2.2152777777777778e-05, |
|
"loss": 0.6821, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 30.125, |
|
"grad_norm": 97.56119537353516, |
|
"learning_rate": 2.2083333333333333e-05, |
|
"loss": 0.298, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 30.1875, |
|
"grad_norm": 102.7960205078125, |
|
"learning_rate": 2.201388888888889e-05, |
|
"loss": 0.4884, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"grad_norm": 73.4170150756836, |
|
"learning_rate": 2.1944444444444445e-05, |
|
"loss": 0.319, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 30.3125, |
|
"grad_norm": 157.68368530273438, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 0.3973, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 30.375, |
|
"grad_norm": 55.67268371582031, |
|
"learning_rate": 2.1805555555555558e-05, |
|
"loss": 0.288, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 30.4375, |
|
"grad_norm": 71.78160858154297, |
|
"learning_rate": 2.1736111111111112e-05, |
|
"loss": 0.533, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"grad_norm": 128.58132934570312, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.32, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 30.5625, |
|
"grad_norm": 84.67550659179688, |
|
"learning_rate": 2.1597222222222225e-05, |
|
"loss": 0.3051, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 30.625, |
|
"grad_norm": 29.67072296142578, |
|
"learning_rate": 2.152777777777778e-05, |
|
"loss": 0.3061, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 30.6875, |
|
"grad_norm": 21.23928451538086, |
|
"learning_rate": 2.1458333333333334e-05, |
|
"loss": 0.1565, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"grad_norm": 75.22709655761719, |
|
"learning_rate": 2.138888888888889e-05, |
|
"loss": 0.3763, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 30.8125, |
|
"grad_norm": 68.2527084350586, |
|
"learning_rate": 2.1319444444444444e-05, |
|
"loss": 0.3484, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 30.875, |
|
"grad_norm": 93.65040588378906, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.5086, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 30.9375, |
|
"grad_norm": 91.93578338623047, |
|
"learning_rate": 2.1180555555555556e-05, |
|
"loss": 0.6098, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 396.6805114746094, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.4993, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8709677419354839, |
|
"eval_auc": 0.9632352941176471, |
|
"eval_f1": 0.8888888888888888, |
|
"eval_loss": 0.2791585624217987, |
|
"eval_precision": 0.8421052631578947, |
|
"eval_recall": 0.9411764705882353, |
|
"eval_runtime": 1.8964, |
|
"eval_samples_per_second": 32.694, |
|
"eval_steps_per_second": 1.055, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 31.0625, |
|
"grad_norm": 218.7271728515625, |
|
"learning_rate": 2.104166666666667e-05, |
|
"loss": 0.5967, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 31.125, |
|
"grad_norm": 85.2174072265625, |
|
"learning_rate": 2.0972222222222223e-05, |
|
"loss": 0.2821, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 31.1875, |
|
"grad_norm": 55.23520278930664, |
|
"learning_rate": 2.0902777777777778e-05, |
|
"loss": 0.1794, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"grad_norm": 58.78559112548828, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.2058, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 31.3125, |
|
"grad_norm": 100.98091888427734, |
|
"learning_rate": 2.076388888888889e-05, |
|
"loss": 0.287, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 31.375, |
|
"grad_norm": 48.73373031616211, |
|
"learning_rate": 2.0694444444444445e-05, |
|
"loss": 0.1958, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 31.4375, |
|
"grad_norm": 47.07331848144531, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 0.3669, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"grad_norm": 68.89096069335938, |
|
"learning_rate": 2.0555555555555555e-05, |
|
"loss": 0.2905, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 31.5625, |
|
"grad_norm": 51.24907302856445, |
|
"learning_rate": 2.0486111111111113e-05, |
|
"loss": 0.3512, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 31.625, |
|
"grad_norm": 73.44024658203125, |
|
"learning_rate": 2.0416666666666667e-05, |
|
"loss": 0.2269, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 31.6875, |
|
"grad_norm": 71.18965148925781, |
|
"learning_rate": 2.0347222222222222e-05, |
|
"loss": 0.3371, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 31.75, |
|
"grad_norm": 98.83439636230469, |
|
"learning_rate": 2.027777777777778e-05, |
|
"loss": 0.5882, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 31.8125, |
|
"grad_norm": 111.71282958984375, |
|
"learning_rate": 2.0208333333333334e-05, |
|
"loss": 0.3465, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 31.875, |
|
"grad_norm": 159.08973693847656, |
|
"learning_rate": 2.013888888888889e-05, |
|
"loss": 0.6081, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 31.9375, |
|
"grad_norm": 68.47103881835938, |
|
"learning_rate": 2.0069444444444447e-05, |
|
"loss": 0.3763, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 43.89309310913086, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2188, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8709677419354839, |
|
"eval_auc": 0.9663865546218487, |
|
"eval_f1": 0.8857142857142857, |
|
"eval_loss": 0.26347893476486206, |
|
"eval_precision": 0.8611111111111112, |
|
"eval_recall": 0.9117647058823529, |
|
"eval_runtime": 1.939, |
|
"eval_samples_per_second": 31.976, |
|
"eval_steps_per_second": 1.031, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 32.0625, |
|
"grad_norm": 46.497779846191406, |
|
"learning_rate": 1.9930555555555556e-05, |
|
"loss": 0.3708, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 32.125, |
|
"grad_norm": 167.9232940673828, |
|
"learning_rate": 1.986111111111111e-05, |
|
"loss": 0.4261, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 32.1875, |
|
"grad_norm": 61.328529357910156, |
|
"learning_rate": 1.9791666666666665e-05, |
|
"loss": 0.3391, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"grad_norm": 170.238037109375, |
|
"learning_rate": 1.9722222222222224e-05, |
|
"loss": 0.4698, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 32.3125, |
|
"grad_norm": 35.044010162353516, |
|
"learning_rate": 1.9652777777777778e-05, |
|
"loss": 0.1823, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 32.375, |
|
"grad_norm": 77.83676147460938, |
|
"learning_rate": 1.9583333333333333e-05, |
|
"loss": 0.3278, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 32.4375, |
|
"grad_norm": 46.95936965942383, |
|
"learning_rate": 1.951388888888889e-05, |
|
"loss": 0.3318, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"grad_norm": 149.09030151367188, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.3393, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 32.5625, |
|
"grad_norm": 67.16770935058594, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 0.4492, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 32.625, |
|
"grad_norm": 55.898834228515625, |
|
"learning_rate": 1.9305555555555558e-05, |
|
"loss": 0.3798, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 32.6875, |
|
"grad_norm": 28.289344787597656, |
|
"learning_rate": 1.9236111111111113e-05, |
|
"loss": 0.1741, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"grad_norm": 86.767333984375, |
|
"learning_rate": 1.9166666666666667e-05, |
|
"loss": 0.3531, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 32.8125, |
|
"grad_norm": 53.152278900146484, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.1571, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 32.875, |
|
"grad_norm": 30.3239688873291, |
|
"learning_rate": 1.9027777777777776e-05, |
|
"loss": 0.2651, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 32.9375, |
|
"grad_norm": 83.10792541503906, |
|
"learning_rate": 1.8958333333333334e-05, |
|
"loss": 0.2756, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 55.581485748291016, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.1285, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8709677419354839, |
|
"eval_auc": 0.9705882352941176, |
|
"eval_f1": 0.8857142857142857, |
|
"eval_loss": 0.2574561536312103, |
|
"eval_precision": 0.8611111111111112, |
|
"eval_recall": 0.9117647058823529, |
|
"eval_runtime": 1.9165, |
|
"eval_samples_per_second": 32.351, |
|
"eval_steps_per_second": 1.044, |
|
"step": 528 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 15, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.2633566842967654e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|