|
{ |
|
"best_metric": 0.9044117647058824, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-papsmear/checkpoint-360", |
|
"epoch": 46.15384615384615, |
|
"eval_steps": 500, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.2426470588235294, |
|
"eval_loss": 1.7588815689086914, |
|
"eval_runtime": 30.041, |
|
"eval_samples_per_second": 4.527, |
|
"eval_steps_per_second": 0.166, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 1.042138695716858, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 1.7862, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.9487179487179487, |
|
"eval_accuracy": 0.38235294117647056, |
|
"eval_loss": 1.58797025680542, |
|
"eval_runtime": 32.3153, |
|
"eval_samples_per_second": 4.209, |
|
"eval_steps_per_second": 0.155, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 0.996979296207428, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.6727, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.9743589743589745, |
|
"eval_accuracy": 0.4264705882352941, |
|
"eval_loss": 1.4212044477462769, |
|
"eval_runtime": 31.937, |
|
"eval_samples_per_second": 4.258, |
|
"eval_steps_per_second": 0.157, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 1.1018966436386108, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.5102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5808823529411765, |
|
"eval_loss": 1.2241116762161255, |
|
"eval_runtime": 30.9919, |
|
"eval_samples_per_second": 4.388, |
|
"eval_steps_per_second": 0.161, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 4.102564102564102, |
|
"grad_norm": 1.025856614112854, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.3247, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.6102941176470589, |
|
"eval_loss": 1.0905669927597046, |
|
"eval_runtime": 31.2612, |
|
"eval_samples_per_second": 4.35, |
|
"eval_steps_per_second": 0.16, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 5.128205128205128, |
|
"grad_norm": 3.468245029449463, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 1.1047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.948717948717949, |
|
"eval_accuracy": 0.6764705882352942, |
|
"eval_loss": 0.9746549129486084, |
|
"eval_runtime": 29.9981, |
|
"eval_samples_per_second": 4.534, |
|
"eval_steps_per_second": 0.167, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 1.6158350706100464, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.9405, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.9743589743589745, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.8744558691978455, |
|
"eval_runtime": 29.8899, |
|
"eval_samples_per_second": 4.55, |
|
"eval_steps_per_second": 0.167, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 7.17948717948718, |
|
"grad_norm": 2.7250912189483643, |
|
"learning_rate": 4.691358024691358e-05, |
|
"loss": 0.823, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.7832698822021484, |
|
"eval_runtime": 29.3695, |
|
"eval_samples_per_second": 4.631, |
|
"eval_steps_per_second": 0.17, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 8.205128205128204, |
|
"grad_norm": 1.1131880283355713, |
|
"learning_rate": 4.567901234567901e-05, |
|
"loss": 0.7244, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.7159935235977173, |
|
"eval_runtime": 30.5505, |
|
"eval_samples_per_second": 4.452, |
|
"eval_steps_per_second": 0.164, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 1.162032961845398, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.6367, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 9.948717948717949, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.7327755093574524, |
|
"eval_runtime": 31.3159, |
|
"eval_samples_per_second": 4.343, |
|
"eval_steps_per_second": 0.16, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 10.256410256410255, |
|
"grad_norm": 2.753892421722412, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.5537, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.974358974358974, |
|
"eval_accuracy": 0.7867647058823529, |
|
"eval_loss": 0.6572667956352234, |
|
"eval_runtime": 29.9075, |
|
"eval_samples_per_second": 4.547, |
|
"eval_steps_per_second": 0.167, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 11.282051282051283, |
|
"grad_norm": 1.8267817497253418, |
|
"learning_rate": 4.197530864197531e-05, |
|
"loss": 0.484, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8088235294117647, |
|
"eval_loss": 0.5988054275512695, |
|
"eval_runtime": 29.8243, |
|
"eval_samples_per_second": 4.56, |
|
"eval_steps_per_second": 0.168, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 3.6334152221679688, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.4642, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.7941176470588235, |
|
"eval_loss": 0.626797080039978, |
|
"eval_runtime": 29.8057, |
|
"eval_samples_per_second": 4.563, |
|
"eval_steps_per_second": 0.168, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 2.4481348991394043, |
|
"learning_rate": 3.950617283950617e-05, |
|
"loss": 0.4166, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 13.948717948717949, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.6549181342124939, |
|
"eval_runtime": 29.9078, |
|
"eval_samples_per_second": 4.547, |
|
"eval_steps_per_second": 0.167, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 14.35897435897436, |
|
"grad_norm": 2.9035937786102295, |
|
"learning_rate": 3.82716049382716e-05, |
|
"loss": 0.4106, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 14.974358974358974, |
|
"eval_accuracy": 0.8529411764705882, |
|
"eval_loss": 0.5330095887184143, |
|
"eval_runtime": 29.4929, |
|
"eval_samples_per_second": 4.611, |
|
"eval_steps_per_second": 0.17, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 3.0346601009368896, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.3947, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8382352941176471, |
|
"eval_loss": 0.5133553147315979, |
|
"eval_runtime": 29.8094, |
|
"eval_samples_per_second": 4.562, |
|
"eval_steps_per_second": 0.168, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 16.41025641025641, |
|
"grad_norm": 2.665196657180786, |
|
"learning_rate": 3.580246913580247e-05, |
|
"loss": 0.3469, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.5879342555999756, |
|
"eval_runtime": 29.7747, |
|
"eval_samples_per_second": 4.568, |
|
"eval_steps_per_second": 0.168, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 17.435897435897434, |
|
"grad_norm": 4.382056713104248, |
|
"learning_rate": 3.45679012345679e-05, |
|
"loss": 0.3151, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 17.94871794871795, |
|
"eval_accuracy": 0.8382352941176471, |
|
"eval_loss": 0.5682740211486816, |
|
"eval_runtime": 29.9811, |
|
"eval_samples_per_second": 4.536, |
|
"eval_steps_per_second": 0.167, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 1.3831549882888794, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2946, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 18.974358974358974, |
|
"eval_accuracy": 0.8161764705882353, |
|
"eval_loss": 0.5382511615753174, |
|
"eval_runtime": 29.6021, |
|
"eval_samples_per_second": 4.594, |
|
"eval_steps_per_second": 0.169, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 19.487179487179485, |
|
"grad_norm": 2.7299916744232178, |
|
"learning_rate": 3.209876543209876e-05, |
|
"loss": 0.2927, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8161764705882353, |
|
"eval_loss": 0.568187952041626, |
|
"eval_runtime": 29.6931, |
|
"eval_samples_per_second": 4.58, |
|
"eval_steps_per_second": 0.168, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 20.51282051282051, |
|
"grad_norm": 2.4772286415100098, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.2879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.8602941176470589, |
|
"eval_loss": 0.4721927046775818, |
|
"eval_runtime": 29.6838, |
|
"eval_samples_per_second": 4.582, |
|
"eval_steps_per_second": 0.168, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 1.078134536743164, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.2512, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 21.94871794871795, |
|
"eval_accuracy": 0.8455882352941176, |
|
"eval_loss": 0.48056113719940186, |
|
"eval_runtime": 29.966, |
|
"eval_samples_per_second": 4.538, |
|
"eval_steps_per_second": 0.167, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 22.564102564102566, |
|
"grad_norm": 1.4218604564666748, |
|
"learning_rate": 2.839506172839506e-05, |
|
"loss": 0.2633, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 22.974358974358974, |
|
"eval_accuracy": 0.8455882352941176, |
|
"eval_loss": 0.4712737500667572, |
|
"eval_runtime": 30.7829, |
|
"eval_samples_per_second": 4.418, |
|
"eval_steps_per_second": 0.162, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 23.58974358974359, |
|
"grad_norm": 1.4475338459014893, |
|
"learning_rate": 2.7160493827160493e-05, |
|
"loss": 0.2286, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8382352941176471, |
|
"eval_loss": 0.5166775584220886, |
|
"eval_runtime": 29.6495, |
|
"eval_samples_per_second": 4.587, |
|
"eval_steps_per_second": 0.169, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 2.2939038276672363, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.2265, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.3885728120803833, |
|
"eval_runtime": 29.8244, |
|
"eval_samples_per_second": 4.56, |
|
"eval_steps_per_second": 0.168, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 2.014761209487915, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.2107, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 25.94871794871795, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_loss": 0.4395664930343628, |
|
"eval_runtime": 30.087, |
|
"eval_samples_per_second": 4.52, |
|
"eval_steps_per_second": 0.166, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 1.4246245622634888, |
|
"learning_rate": 2.345679012345679e-05, |
|
"loss": 0.2044, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 26.974358974358974, |
|
"eval_accuracy": 0.8455882352941176, |
|
"eval_loss": 0.47336432337760925, |
|
"eval_runtime": 30.2689, |
|
"eval_samples_per_second": 4.493, |
|
"eval_steps_per_second": 0.165, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 1.1730149984359741, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1925, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8529411764705882, |
|
"eval_loss": 0.4605894684791565, |
|
"eval_runtime": 29.9687, |
|
"eval_samples_per_second": 4.538, |
|
"eval_steps_per_second": 0.167, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 28.71794871794872, |
|
"grad_norm": 1.8061479330062866, |
|
"learning_rate": 2.0987654320987655e-05, |
|
"loss": 0.1866, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.8308823529411765, |
|
"eval_loss": 0.506081223487854, |
|
"eval_runtime": 29.7747, |
|
"eval_samples_per_second": 4.568, |
|
"eval_steps_per_second": 0.168, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 29.743589743589745, |
|
"grad_norm": 3.999681234359741, |
|
"learning_rate": 1.9753086419753087e-05, |
|
"loss": 0.1928, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 29.94871794871795, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.42022156715393066, |
|
"eval_runtime": 31.5903, |
|
"eval_samples_per_second": 4.305, |
|
"eval_steps_per_second": 0.158, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 1.7130581140518188, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1907, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.974358974358974, |
|
"eval_accuracy": 0.8308823529411765, |
|
"eval_loss": 0.5120359659194946, |
|
"eval_runtime": 29.2951, |
|
"eval_samples_per_second": 4.642, |
|
"eval_steps_per_second": 0.171, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 31.794871794871796, |
|
"grad_norm": 2.6331541538238525, |
|
"learning_rate": 1.728395061728395e-05, |
|
"loss": 0.1631, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_loss": 0.41645094752311707, |
|
"eval_runtime": 29.9412, |
|
"eval_samples_per_second": 4.542, |
|
"eval_steps_per_second": 0.167, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 32.82051282051282, |
|
"grad_norm": 2.0035409927368164, |
|
"learning_rate": 1.604938271604938e-05, |
|
"loss": 0.1654, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_loss": 0.45997411012649536, |
|
"eval_runtime": 29.5665, |
|
"eval_samples_per_second": 4.6, |
|
"eval_steps_per_second": 0.169, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 0.8273878693580627, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.154, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 33.94871794871795, |
|
"eval_accuracy": 0.8970588235294118, |
|
"eval_loss": 0.3834398686885834, |
|
"eval_runtime": 29.8196, |
|
"eval_samples_per_second": 4.561, |
|
"eval_steps_per_second": 0.168, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 34.87179487179487, |
|
"grad_norm": 1.8872778415679932, |
|
"learning_rate": 1.3580246913580247e-05, |
|
"loss": 0.1459, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 34.97435897435897, |
|
"eval_accuracy": 0.8897058823529411, |
|
"eval_loss": 0.36863845586776733, |
|
"eval_runtime": 29.8029, |
|
"eval_samples_per_second": 4.563, |
|
"eval_steps_per_second": 0.168, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 35.8974358974359, |
|
"grad_norm": 1.480739712715149, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.1452, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_loss": 0.41742780804634094, |
|
"eval_runtime": 30.4904, |
|
"eval_samples_per_second": 4.46, |
|
"eval_steps_per_second": 0.164, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 2.4121947288513184, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1548, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.9044117647058824, |
|
"eval_loss": 0.379115492105484, |
|
"eval_runtime": 31.2755, |
|
"eval_samples_per_second": 4.348, |
|
"eval_steps_per_second": 0.16, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 37.94871794871795, |
|
"grad_norm": 1.7541086673736572, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.1395, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 37.94871794871795, |
|
"eval_accuracy": 0.8529411764705882, |
|
"eval_loss": 0.4511679708957672, |
|
"eval_runtime": 29.0831, |
|
"eval_samples_per_second": 4.676, |
|
"eval_steps_per_second": 0.172, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 38.97435897435897, |
|
"grad_norm": 1.2144207954406738, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.1333, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 38.97435897435897, |
|
"eval_accuracy": 0.8897058823529411, |
|
"eval_loss": 0.37747910618782043, |
|
"eval_runtime": 29.5567, |
|
"eval_samples_per_second": 4.601, |
|
"eval_steps_per_second": 0.169, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 1.966362714767456, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1236, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8970588235294118, |
|
"eval_loss": 0.3665925860404968, |
|
"eval_runtime": 29.5708, |
|
"eval_samples_per_second": 4.599, |
|
"eval_steps_per_second": 0.169, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.8970588235294118, |
|
"eval_loss": 0.38919442892074585, |
|
"eval_runtime": 29.9522, |
|
"eval_samples_per_second": 4.541, |
|
"eval_steps_per_second": 0.167, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 41.02564102564103, |
|
"grad_norm": 1.2910165786743164, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.1314, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 41.94871794871795, |
|
"eval_accuracy": 0.8897058823529411, |
|
"eval_loss": 0.3831816613674164, |
|
"eval_runtime": 30.006, |
|
"eval_samples_per_second": 4.532, |
|
"eval_steps_per_second": 0.167, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 42.05128205128205, |
|
"grad_norm": 1.5890743732452393, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.1322, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 42.97435897435897, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.39192765951156616, |
|
"eval_runtime": 30.5878, |
|
"eval_samples_per_second": 4.446, |
|
"eval_steps_per_second": 0.163, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 0.9582741260528564, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1156, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8970588235294118, |
|
"eval_loss": 0.369939923286438, |
|
"eval_runtime": 30.3858, |
|
"eval_samples_per_second": 4.476, |
|
"eval_steps_per_second": 0.165, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 44.1025641025641, |
|
"grad_norm": 2.247335910797119, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.1222, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.8970588235294118, |
|
"eval_loss": 0.38276419043540955, |
|
"eval_runtime": 29.9911, |
|
"eval_samples_per_second": 4.535, |
|
"eval_steps_per_second": 0.167, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 45.12820512820513, |
|
"grad_norm": 0.5052188038825989, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.1254, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 45.94871794871795, |
|
"eval_accuracy": 0.8897058823529411, |
|
"eval_loss": 0.38526448607444763, |
|
"eval_runtime": 31.3146, |
|
"eval_samples_per_second": 4.343, |
|
"eval_steps_per_second": 0.16, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 0.5415890216827393, |
|
"learning_rate": 0.0, |
|
"loss": 0.1129, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"eval_accuracy": 0.8897058823529411, |
|
"eval_loss": 0.38528940081596375, |
|
"eval_runtime": 30.9585, |
|
"eval_samples_per_second": 4.393, |
|
"eval_steps_per_second": 0.162, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"step": 450, |
|
"total_flos": 4.3781443993328026e+18, |
|
"train_loss": 0.4221388864517212, |
|
"train_runtime": 14616.4414, |
|
"train_samples_per_second": 4.187, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.3781443993328026e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|