{ "best_metric": 0.9044117647058824, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-papsmear/checkpoint-360", "epoch": 46.15384615384615, "eval_steps": 500, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.2426470588235294, "eval_loss": 1.7588815689086914, "eval_runtime": 30.041, "eval_samples_per_second": 4.527, "eval_steps_per_second": 0.166, "step": 9 }, { "epoch": 1.0256410256410255, "grad_norm": 1.042138695716858, "learning_rate": 1.1111111111111112e-05, "loss": 1.7862, "step": 10 }, { "epoch": 1.9487179487179487, "eval_accuracy": 0.38235294117647056, "eval_loss": 1.58797025680542, "eval_runtime": 32.3153, "eval_samples_per_second": 4.209, "eval_steps_per_second": 0.155, "step": 19 }, { "epoch": 2.051282051282051, "grad_norm": 0.996979296207428, "learning_rate": 2.2222222222222223e-05, "loss": 1.6727, "step": 20 }, { "epoch": 2.9743589743589745, "eval_accuracy": 0.4264705882352941, "eval_loss": 1.4212044477462769, "eval_runtime": 31.937, "eval_samples_per_second": 4.258, "eval_steps_per_second": 0.157, "step": 29 }, { "epoch": 3.076923076923077, "grad_norm": 1.1018966436386108, "learning_rate": 3.3333333333333335e-05, "loss": 1.5102, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.5808823529411765, "eval_loss": 1.2241116762161255, "eval_runtime": 30.9919, "eval_samples_per_second": 4.388, "eval_steps_per_second": 0.161, "step": 39 }, { "epoch": 4.102564102564102, "grad_norm": 1.025856614112854, "learning_rate": 4.4444444444444447e-05, "loss": 1.3247, "step": 40 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.6102941176470589, "eval_loss": 1.0905669927597046, "eval_runtime": 31.2612, "eval_samples_per_second": 4.35, "eval_steps_per_second": 0.16, "step": 48 }, { "epoch": 5.128205128205128, "grad_norm": 3.468245029449463, "learning_rate": 4.938271604938271e-05, "loss": 1.1047, "step": 50 }, { "epoch": 5.948717948717949, "eval_accuracy": 0.6764705882352942, "eval_loss": 0.9746549129486084, "eval_runtime": 29.9981, "eval_samples_per_second": 4.534, "eval_steps_per_second": 0.167, "step": 58 }, { "epoch": 6.153846153846154, "grad_norm": 1.6158350706100464, "learning_rate": 4.814814814814815e-05, "loss": 0.9405, "step": 60 }, { "epoch": 6.9743589743589745, "eval_accuracy": 0.7426470588235294, "eval_loss": 0.8744558691978455, "eval_runtime": 29.8899, "eval_samples_per_second": 4.55, "eval_steps_per_second": 0.167, "step": 68 }, { "epoch": 7.17948717948718, "grad_norm": 2.7250912189483643, "learning_rate": 4.691358024691358e-05, "loss": 0.823, "step": 70 }, { "epoch": 8.0, "eval_accuracy": 0.7426470588235294, "eval_loss": 0.7832698822021484, "eval_runtime": 29.3695, "eval_samples_per_second": 4.631, "eval_steps_per_second": 0.17, "step": 78 }, { "epoch": 8.205128205128204, "grad_norm": 1.1131880283355713, "learning_rate": 4.567901234567901e-05, "loss": 0.7244, "step": 80 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.7794117647058824, "eval_loss": 0.7159935235977173, "eval_runtime": 30.5505, "eval_samples_per_second": 4.452, "eval_steps_per_second": 0.164, "step": 87 }, { "epoch": 9.23076923076923, "grad_norm": 1.162032961845398, "learning_rate": 4.4444444444444447e-05, "loss": 0.6367, "step": 90 }, { "epoch": 9.948717948717949, "eval_accuracy": 0.7794117647058824, "eval_loss": 0.7327755093574524, "eval_runtime": 31.3159, "eval_samples_per_second": 4.343, "eval_steps_per_second": 0.16, "step": 97 }, { "epoch": 10.256410256410255, "grad_norm": 2.753892421722412, "learning_rate": 4.3209876543209875e-05, "loss": 0.5537, "step": 100 }, { "epoch": 10.974358974358974, "eval_accuracy": 0.7867647058823529, "eval_loss": 0.6572667956352234, "eval_runtime": 29.9075, "eval_samples_per_second": 4.547, "eval_steps_per_second": 0.167, "step": 107 }, { "epoch": 11.282051282051283, "grad_norm": 1.8267817497253418, "learning_rate": 4.197530864197531e-05, "loss": 0.484, "step": 110 }, { "epoch": 12.0, "eval_accuracy": 0.8088235294117647, "eval_loss": 0.5988054275512695, "eval_runtime": 29.8243, "eval_samples_per_second": 4.56, "eval_steps_per_second": 0.168, "step": 117 }, { "epoch": 12.307692307692308, "grad_norm": 3.6334152221679688, "learning_rate": 4.074074074074074e-05, "loss": 0.4642, "step": 120 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.7941176470588235, "eval_loss": 0.626797080039978, "eval_runtime": 29.8057, "eval_samples_per_second": 4.563, "eval_steps_per_second": 0.168, "step": 126 }, { "epoch": 13.333333333333334, "grad_norm": 2.4481348991394043, "learning_rate": 3.950617283950617e-05, "loss": 0.4166, "step": 130 }, { "epoch": 13.948717948717949, "eval_accuracy": 0.7794117647058824, "eval_loss": 0.6549181342124939, "eval_runtime": 29.9078, "eval_samples_per_second": 4.547, "eval_steps_per_second": 0.167, "step": 136 }, { "epoch": 14.35897435897436, "grad_norm": 2.9035937786102295, "learning_rate": 3.82716049382716e-05, "loss": 0.4106, "step": 140 }, { "epoch": 14.974358974358974, "eval_accuracy": 0.8529411764705882, "eval_loss": 0.5330095887184143, "eval_runtime": 29.4929, "eval_samples_per_second": 4.611, "eval_steps_per_second": 0.17, "step": 146 }, { "epoch": 15.384615384615385, "grad_norm": 3.0346601009368896, "learning_rate": 3.7037037037037037e-05, "loss": 0.3947, "step": 150 }, { "epoch": 16.0, "eval_accuracy": 0.8382352941176471, "eval_loss": 0.5133553147315979, "eval_runtime": 29.8094, "eval_samples_per_second": 4.562, "eval_steps_per_second": 0.168, "step": 156 }, { "epoch": 16.41025641025641, "grad_norm": 2.665196657180786, "learning_rate": 3.580246913580247e-05, "loss": 0.3469, "step": 160 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.7794117647058824, "eval_loss": 0.5879342555999756, "eval_runtime": 29.7747, "eval_samples_per_second": 4.568, "eval_steps_per_second": 0.168, "step": 165 }, { "epoch": 17.435897435897434, "grad_norm": 4.382056713104248, "learning_rate": 3.45679012345679e-05, "loss": 0.3151, "step": 170 }, { "epoch": 17.94871794871795, "eval_accuracy": 0.8382352941176471, "eval_loss": 0.5682740211486816, "eval_runtime": 29.9811, "eval_samples_per_second": 4.536, "eval_steps_per_second": 0.167, "step": 175 }, { "epoch": 18.46153846153846, "grad_norm": 1.3831549882888794, "learning_rate": 3.3333333333333335e-05, "loss": 0.2946, "step": 180 }, { "epoch": 18.974358974358974, "eval_accuracy": 0.8161764705882353, "eval_loss": 0.5382511615753174, "eval_runtime": 29.6021, "eval_samples_per_second": 4.594, "eval_steps_per_second": 0.169, "step": 185 }, { "epoch": 19.487179487179485, "grad_norm": 2.7299916744232178, "learning_rate": 3.209876543209876e-05, "loss": 0.2927, "step": 190 }, { "epoch": 20.0, "eval_accuracy": 0.8161764705882353, "eval_loss": 0.568187952041626, "eval_runtime": 29.6931, "eval_samples_per_second": 4.58, "eval_steps_per_second": 0.168, "step": 195 }, { "epoch": 20.51282051282051, "grad_norm": 2.4772286415100098, "learning_rate": 3.08641975308642e-05, "loss": 0.2879, "step": 200 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.8602941176470589, "eval_loss": 0.4721927046775818, "eval_runtime": 29.6838, "eval_samples_per_second": 4.582, "eval_steps_per_second": 0.168, "step": 204 }, { "epoch": 21.53846153846154, "grad_norm": 1.078134536743164, "learning_rate": 2.962962962962963e-05, "loss": 0.2512, "step": 210 }, { "epoch": 21.94871794871795, "eval_accuracy": 0.8455882352941176, "eval_loss": 0.48056113719940186, "eval_runtime": 29.966, "eval_samples_per_second": 4.538, "eval_steps_per_second": 0.167, "step": 214 }, { "epoch": 22.564102564102566, "grad_norm": 1.4218604564666748, "learning_rate": 2.839506172839506e-05, "loss": 0.2633, "step": 220 }, { "epoch": 22.974358974358974, "eval_accuracy": 0.8455882352941176, "eval_loss": 0.4712737500667572, "eval_runtime": 30.7829, "eval_samples_per_second": 4.418, "eval_steps_per_second": 0.162, "step": 224 }, { "epoch": 23.58974358974359, "grad_norm": 1.4475338459014893, "learning_rate": 2.7160493827160493e-05, "loss": 0.2286, "step": 230 }, { "epoch": 24.0, "eval_accuracy": 0.8382352941176471, "eval_loss": 0.5166775584220886, "eval_runtime": 29.6495, "eval_samples_per_second": 4.587, "eval_steps_per_second": 0.169, "step": 234 }, { "epoch": 24.615384615384617, "grad_norm": 2.2939038276672363, "learning_rate": 2.5925925925925925e-05, "loss": 0.2265, "step": 240 }, { "epoch": 24.923076923076923, "eval_accuracy": 0.8823529411764706, "eval_loss": 0.3885728120803833, "eval_runtime": 29.8244, "eval_samples_per_second": 4.56, "eval_steps_per_second": 0.168, "step": 243 }, { "epoch": 25.641025641025642, "grad_norm": 2.014761209487915, "learning_rate": 2.4691358024691357e-05, "loss": 0.2107, "step": 250 }, { "epoch": 25.94871794871795, "eval_accuracy": 0.8676470588235294, "eval_loss": 0.4395664930343628, "eval_runtime": 30.087, "eval_samples_per_second": 4.52, "eval_steps_per_second": 0.166, "step": 253 }, { "epoch": 26.666666666666668, "grad_norm": 1.4246245622634888, "learning_rate": 2.345679012345679e-05, "loss": 0.2044, "step": 260 }, { "epoch": 26.974358974358974, "eval_accuracy": 0.8455882352941176, "eval_loss": 0.47336432337760925, "eval_runtime": 30.2689, "eval_samples_per_second": 4.493, "eval_steps_per_second": 0.165, "step": 263 }, { "epoch": 27.692307692307693, "grad_norm": 1.1730149984359741, "learning_rate": 2.2222222222222223e-05, "loss": 0.1925, "step": 270 }, { "epoch": 28.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 0.4605894684791565, "eval_runtime": 29.9687, "eval_samples_per_second": 4.538, "eval_steps_per_second": 0.167, "step": 273 }, { "epoch": 28.71794871794872, "grad_norm": 1.8061479330062866, "learning_rate": 2.0987654320987655e-05, "loss": 0.1866, "step": 280 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.8308823529411765, "eval_loss": 0.506081223487854, "eval_runtime": 29.7747, "eval_samples_per_second": 4.568, "eval_steps_per_second": 0.168, "step": 282 }, { "epoch": 29.743589743589745, "grad_norm": 3.999681234359741, "learning_rate": 1.9753086419753087e-05, "loss": 0.1928, "step": 290 }, { "epoch": 29.94871794871795, "eval_accuracy": 0.8823529411764706, "eval_loss": 0.42022156715393066, "eval_runtime": 31.5903, "eval_samples_per_second": 4.305, "eval_steps_per_second": 0.158, "step": 292 }, { "epoch": 30.76923076923077, "grad_norm": 1.7130581140518188, "learning_rate": 1.8518518518518518e-05, "loss": 0.1907, "step": 300 }, { "epoch": 30.974358974358974, "eval_accuracy": 0.8308823529411765, "eval_loss": 0.5120359659194946, "eval_runtime": 29.2951, "eval_samples_per_second": 4.642, "eval_steps_per_second": 0.171, "step": 302 }, { "epoch": 31.794871794871796, "grad_norm": 2.6331541538238525, "learning_rate": 1.728395061728395e-05, "loss": 0.1631, "step": 310 }, { "epoch": 32.0, "eval_accuracy": 0.8676470588235294, "eval_loss": 0.41645094752311707, "eval_runtime": 29.9412, "eval_samples_per_second": 4.542, "eval_steps_per_second": 0.167, "step": 312 }, { "epoch": 32.82051282051282, "grad_norm": 2.0035409927368164, "learning_rate": 1.604938271604938e-05, "loss": 0.1654, "step": 320 }, { "epoch": 32.92307692307692, "eval_accuracy": 0.8676470588235294, "eval_loss": 0.45997411012649536, "eval_runtime": 29.5665, "eval_samples_per_second": 4.6, "eval_steps_per_second": 0.169, "step": 321 }, { "epoch": 33.84615384615385, "grad_norm": 0.8273878693580627, "learning_rate": 1.4814814814814815e-05, "loss": 0.154, "step": 330 }, { "epoch": 33.94871794871795, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.3834398686885834, "eval_runtime": 29.8196, "eval_samples_per_second": 4.561, "eval_steps_per_second": 0.168, "step": 331 }, { "epoch": 34.87179487179487, "grad_norm": 1.8872778415679932, "learning_rate": 1.3580246913580247e-05, "loss": 0.1459, "step": 340 }, { "epoch": 34.97435897435897, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.36863845586776733, "eval_runtime": 29.8029, "eval_samples_per_second": 4.563, "eval_steps_per_second": 0.168, "step": 341 }, { "epoch": 35.8974358974359, "grad_norm": 1.480739712715149, "learning_rate": 1.2345679012345678e-05, "loss": 0.1452, "step": 350 }, { "epoch": 36.0, "eval_accuracy": 0.8676470588235294, "eval_loss": 0.41742780804634094, "eval_runtime": 30.4904, "eval_samples_per_second": 4.46, "eval_steps_per_second": 0.164, "step": 351 }, { "epoch": 36.92307692307692, "grad_norm": 2.4121947288513184, "learning_rate": 1.1111111111111112e-05, "loss": 0.1548, "step": 360 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.9044117647058824, "eval_loss": 0.379115492105484, "eval_runtime": 31.2755, "eval_samples_per_second": 4.348, "eval_steps_per_second": 0.16, "step": 360 }, { "epoch": 37.94871794871795, "grad_norm": 1.7541086673736572, "learning_rate": 9.876543209876543e-06, "loss": 0.1395, "step": 370 }, { "epoch": 37.94871794871795, "eval_accuracy": 0.8529411764705882, "eval_loss": 0.4511679708957672, "eval_runtime": 29.0831, "eval_samples_per_second": 4.676, "eval_steps_per_second": 0.172, "step": 370 }, { "epoch": 38.97435897435897, "grad_norm": 1.2144207954406738, "learning_rate": 8.641975308641975e-06, "loss": 0.1333, "step": 380 }, { "epoch": 38.97435897435897, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.37747910618782043, "eval_runtime": 29.5567, "eval_samples_per_second": 4.601, "eval_steps_per_second": 0.169, "step": 380 }, { "epoch": 40.0, "grad_norm": 1.966362714767456, "learning_rate": 7.4074074074074075e-06, "loss": 0.1236, "step": 390 }, { "epoch": 40.0, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.3665925860404968, "eval_runtime": 29.5708, "eval_samples_per_second": 4.599, "eval_steps_per_second": 0.169, "step": 390 }, { "epoch": 40.92307692307692, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.38919442892074585, "eval_runtime": 29.9522, "eval_samples_per_second": 4.541, "eval_steps_per_second": 0.167, "step": 399 }, { "epoch": 41.02564102564103, "grad_norm": 1.2910165786743164, "learning_rate": 6.172839506172839e-06, "loss": 0.1314, "step": 400 }, { "epoch": 41.94871794871795, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.3831816613674164, "eval_runtime": 30.006, "eval_samples_per_second": 4.532, "eval_steps_per_second": 0.167, "step": 409 }, { "epoch": 42.05128205128205, "grad_norm": 1.5890743732452393, "learning_rate": 4.938271604938272e-06, "loss": 0.1322, "step": 410 }, { "epoch": 42.97435897435897, "eval_accuracy": 0.8823529411764706, "eval_loss": 0.39192765951156616, "eval_runtime": 30.5878, "eval_samples_per_second": 4.446, "eval_steps_per_second": 0.163, "step": 419 }, { "epoch": 43.07692307692308, "grad_norm": 0.9582741260528564, "learning_rate": 3.7037037037037037e-06, "loss": 0.1156, "step": 420 }, { "epoch": 44.0, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.369939923286438, "eval_runtime": 30.3858, "eval_samples_per_second": 4.476, "eval_steps_per_second": 0.165, "step": 429 }, { "epoch": 44.1025641025641, "grad_norm": 2.247335910797119, "learning_rate": 2.469135802469136e-06, "loss": 0.1222, "step": 430 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.38276419043540955, "eval_runtime": 29.9911, "eval_samples_per_second": 4.535, "eval_steps_per_second": 0.167, "step": 438 }, { "epoch": 45.12820512820513, "grad_norm": 0.5052188038825989, "learning_rate": 1.234567901234568e-06, "loss": 0.1254, "step": 440 }, { "epoch": 45.94871794871795, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.38526448607444763, "eval_runtime": 31.3146, "eval_samples_per_second": 4.343, "eval_steps_per_second": 0.16, "step": 448 }, { "epoch": 46.15384615384615, "grad_norm": 0.5415890216827393, "learning_rate": 0.0, "loss": 0.1129, "step": 450 }, { "epoch": 46.15384615384615, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.38528940081596375, "eval_runtime": 30.9585, "eval_samples_per_second": 4.393, "eval_steps_per_second": 0.162, "step": 450 }, { "epoch": 46.15384615384615, "step": 450, "total_flos": 4.3781443993328026e+18, "train_loss": 0.4221388864517212, "train_runtime": 14616.4414, "train_samples_per_second": 4.187, "train_steps_per_second": 0.031 } ], "logging_steps": 10, "max_steps": 450, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.3781443993328026e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }