{ "best_metric": 0.8638888888888889, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-inaturalist/checkpoint-140", "epoch": 80.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "eval_accuracy": 0.03333333333333333, "eval_loss": 3.200693368911743, "eval_runtime": 3.9107, "eval_samples_per_second": 184.111, "eval_steps_per_second": 1.534, "step": 2 }, { "epoch": 2.0, "eval_accuracy": 0.044444444444444446, "eval_loss": 3.188920021057129, "eval_runtime": 3.5624, "eval_samples_per_second": 202.11, "eval_steps_per_second": 1.684, "step": 5 }, { "epoch": 2.8, "eval_accuracy": 0.06388888888888888, "eval_loss": 3.1746575832366943, "eval_runtime": 3.358, "eval_samples_per_second": 214.412, "eval_steps_per_second": 1.787, "step": 7 }, { "epoch": 4.0, "grad_norm": 0.6863054633140564, "learning_rate": 2.5e-05, "loss": 3.1888, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.10972222222222222, "eval_loss": 3.144199848175049, "eval_runtime": 3.3224, "eval_samples_per_second": 216.708, "eval_steps_per_second": 1.806, "step": 10 }, { "epoch": 4.8, "eval_accuracy": 0.14583333333333334, "eval_loss": 3.1183247566223145, "eval_runtime": 3.3659, "eval_samples_per_second": 213.913, "eval_steps_per_second": 1.783, "step": 12 }, { "epoch": 6.0, "eval_accuracy": 0.21944444444444444, "eval_loss": 3.071033477783203, "eval_runtime": 3.1013, "eval_samples_per_second": 232.158, "eval_steps_per_second": 1.935, "step": 15 }, { "epoch": 6.8, "eval_accuracy": 0.30416666666666664, "eval_loss": 3.0330796241760254, "eval_runtime": 3.0337, "eval_samples_per_second": 237.333, "eval_steps_per_second": 1.978, "step": 17 }, { "epoch": 8.0, "grad_norm": 0.7356524467468262, "learning_rate": 5e-05, "loss": 3.0673, "step": 20 }, { "epoch": 8.0, "eval_accuracy": 0.4388888888888889, "eval_loss": 2.96268367767334, "eval_runtime": 3.2316, "eval_samples_per_second": 222.8, "eval_steps_per_second": 1.857, "step": 20 }, { "epoch": 8.8, "eval_accuracy": 0.49444444444444446, "eval_loss": 2.9109385013580322, "eval_runtime": 3.1222, "eval_samples_per_second": 230.605, "eval_steps_per_second": 1.922, "step": 22 }, { "epoch": 10.0, "eval_accuracy": 0.5763888888888888, "eval_loss": 2.8359875679016113, "eval_runtime": 3.0808, "eval_samples_per_second": 233.708, "eval_steps_per_second": 1.948, "step": 25 }, { "epoch": 10.8, "eval_accuracy": 0.6055555555555555, "eval_loss": 2.780921459197998, "eval_runtime": 3.0798, "eval_samples_per_second": 233.781, "eval_steps_per_second": 1.948, "step": 27 }, { "epoch": 12.0, "grad_norm": 0.76748126745224, "learning_rate": 4.722222222222222e-05, "loss": 2.8151, "step": 30 }, { "epoch": 12.0, "eval_accuracy": 0.6541666666666667, "eval_loss": 2.695770025253296, "eval_runtime": 3.3541, "eval_samples_per_second": 214.662, "eval_steps_per_second": 1.789, "step": 30 }, { "epoch": 12.8, "eval_accuracy": 0.6763888888888889, "eval_loss": 2.640125036239624, "eval_runtime": 3.1085, "eval_samples_per_second": 231.622, "eval_steps_per_second": 1.93, "step": 32 }, { "epoch": 14.0, "eval_accuracy": 0.6944444444444444, "eval_loss": 2.558335304260254, "eval_runtime": 3.0528, "eval_samples_per_second": 235.852, "eval_steps_per_second": 1.965, "step": 35 }, { "epoch": 14.8, "eval_accuracy": 0.7083333333333334, "eval_loss": 2.503415822982788, "eval_runtime": 3.1925, "eval_samples_per_second": 225.525, "eval_steps_per_second": 1.879, "step": 37 }, { "epoch": 16.0, "grad_norm": 0.8363860249519348, "learning_rate": 4.4444444444444447e-05, "loss": 2.5143, "step": 40 }, { "epoch": 16.0, "eval_accuracy": 0.7347222222222223, "eval_loss": 2.4201643466949463, "eval_runtime": 3.0856, "eval_samples_per_second": 233.339, "eval_steps_per_second": 1.944, "step": 40 }, { "epoch": 16.8, "eval_accuracy": 0.7375, "eval_loss": 2.3662188053131104, "eval_runtime": 3.0872, "eval_samples_per_second": 233.218, "eval_steps_per_second": 1.943, "step": 42 }, { "epoch": 18.0, "eval_accuracy": 0.7444444444444445, "eval_loss": 2.2883973121643066, "eval_runtime": 3.1262, "eval_samples_per_second": 230.314, "eval_steps_per_second": 1.919, "step": 45 }, { "epoch": 18.8, "eval_accuracy": 0.7569444444444444, "eval_loss": 2.237414598464966, "eval_runtime": 3.0834, "eval_samples_per_second": 233.51, "eval_steps_per_second": 1.946, "step": 47 }, { "epoch": 20.0, "grad_norm": 0.8517465591430664, "learning_rate": 4.166666666666667e-05, "loss": 2.2236, "step": 50 }, { "epoch": 20.0, "eval_accuracy": 0.7777777777777778, "eval_loss": 2.1632001399993896, "eval_runtime": 3.0769, "eval_samples_per_second": 234.001, "eval_steps_per_second": 1.95, "step": 50 }, { "epoch": 20.8, "eval_accuracy": 0.7833333333333333, "eval_loss": 2.1174519062042236, "eval_runtime": 3.0592, "eval_samples_per_second": 235.356, "eval_steps_per_second": 1.961, "step": 52 }, { "epoch": 22.0, "eval_accuracy": 0.7930555555555555, "eval_loss": 2.052760124206543, "eval_runtime": 3.21, "eval_samples_per_second": 224.301, "eval_steps_per_second": 1.869, "step": 55 }, { "epoch": 22.8, "eval_accuracy": 0.7958333333333333, "eval_loss": 2.009880304336548, "eval_runtime": 3.0534, "eval_samples_per_second": 235.801, "eval_steps_per_second": 1.965, "step": 57 }, { "epoch": 24.0, "grad_norm": 0.8587987422943115, "learning_rate": 3.888888888888889e-05, "loss": 1.9677, "step": 60 }, { "epoch": 24.0, "eval_accuracy": 0.8013888888888889, "eval_loss": 1.9488461017608643, "eval_runtime": 3.1017, "eval_samples_per_second": 232.129, "eval_steps_per_second": 1.934, "step": 60 }, { "epoch": 24.8, "eval_accuracy": 0.8097222222222222, "eval_loss": 1.9112929105758667, "eval_runtime": 3.0366, "eval_samples_per_second": 237.104, "eval_steps_per_second": 1.976, "step": 62 }, { "epoch": 26.0, "eval_accuracy": 0.8138888888888889, "eval_loss": 1.8581663370132446, "eval_runtime": 3.0592, "eval_samples_per_second": 235.359, "eval_steps_per_second": 1.961, "step": 65 }, { "epoch": 26.8, "eval_accuracy": 0.8138888888888889, "eval_loss": 1.8241873979568481, "eval_runtime": 3.2249, "eval_samples_per_second": 223.26, "eval_steps_per_second": 1.861, "step": 67 }, { "epoch": 28.0, "grad_norm": 0.8794375061988831, "learning_rate": 3.611111111111111e-05, "loss": 1.7467, "step": 70 }, { "epoch": 28.0, "eval_accuracy": 0.8111111111111111, "eval_loss": 1.7740373611450195, "eval_runtime": 3.1535, "eval_samples_per_second": 228.319, "eval_steps_per_second": 1.903, "step": 70 }, { "epoch": 28.8, "eval_accuracy": 0.8055555555555556, "eval_loss": 1.7457906007766724, "eval_runtime": 3.0886, "eval_samples_per_second": 233.114, "eval_steps_per_second": 1.943, "step": 72 }, { "epoch": 30.0, "eval_accuracy": 0.8180555555555555, "eval_loss": 1.7013169527053833, "eval_runtime": 3.0478, "eval_samples_per_second": 236.234, "eval_steps_per_second": 1.969, "step": 75 }, { "epoch": 30.8, "eval_accuracy": 0.8194444444444444, "eval_loss": 1.6714116334915161, "eval_runtime": 3.1668, "eval_samples_per_second": 227.361, "eval_steps_per_second": 1.895, "step": 77 }, { "epoch": 32.0, "grad_norm": 0.9110932946205139, "learning_rate": 3.3333333333333335e-05, "loss": 1.5765, "step": 80 }, { "epoch": 32.0, "eval_accuracy": 0.8263888888888888, "eval_loss": 1.631589412689209, "eval_runtime": 3.3174, "eval_samples_per_second": 217.038, "eval_steps_per_second": 1.809, "step": 80 }, { "epoch": 32.8, "eval_accuracy": 0.8236111111111111, "eval_loss": 1.6082607507705688, "eval_runtime": 3.107, "eval_samples_per_second": 231.734, "eval_steps_per_second": 1.931, "step": 82 }, { "epoch": 34.0, "eval_accuracy": 0.8291666666666667, "eval_loss": 1.5738186836242676, "eval_runtime": 3.1316, "eval_samples_per_second": 229.912, "eval_steps_per_second": 1.916, "step": 85 }, { "epoch": 34.8, "eval_accuracy": 0.8347222222222223, "eval_loss": 1.553105354309082, "eval_runtime": 3.2321, "eval_samples_per_second": 222.767, "eval_steps_per_second": 1.856, "step": 87 }, { "epoch": 36.0, "grad_norm": 0.9191176891326904, "learning_rate": 3.055555555555556e-05, "loss": 1.4431, "step": 90 }, { "epoch": 36.0, "eval_accuracy": 0.8430555555555556, "eval_loss": 1.5228244066238403, "eval_runtime": 3.0649, "eval_samples_per_second": 234.921, "eval_steps_per_second": 1.958, "step": 90 }, { "epoch": 36.8, "eval_accuracy": 0.8444444444444444, "eval_loss": 1.5046004056930542, "eval_runtime": 3.2047, "eval_samples_per_second": 224.669, "eval_steps_per_second": 1.872, "step": 92 }, { "epoch": 38.0, "eval_accuracy": 0.8472222222222222, "eval_loss": 1.4780092239379883, "eval_runtime": 3.0681, "eval_samples_per_second": 234.673, "eval_steps_per_second": 1.956, "step": 95 }, { "epoch": 38.8, "eval_accuracy": 0.8458333333333333, "eval_loss": 1.4607552289962769, "eval_runtime": 3.3287, "eval_samples_per_second": 216.303, "eval_steps_per_second": 1.803, "step": 97 }, { "epoch": 40.0, "grad_norm": 0.896637499332428, "learning_rate": 2.777777777777778e-05, "loss": 1.3049, "step": 100 }, { "epoch": 40.0, "eval_accuracy": 0.8458333333333333, "eval_loss": 1.435728907585144, "eval_runtime": 3.1142, "eval_samples_per_second": 231.201, "eval_steps_per_second": 1.927, "step": 100 }, { "epoch": 40.8, "eval_accuracy": 0.85, "eval_loss": 1.4187902212142944, "eval_runtime": 3.144, "eval_samples_per_second": 229.01, "eval_steps_per_second": 1.908, "step": 102 }, { "epoch": 42.0, "eval_accuracy": 0.8527777777777777, "eval_loss": 1.3949499130249023, "eval_runtime": 3.0808, "eval_samples_per_second": 233.709, "eval_steps_per_second": 1.948, "step": 105 }, { "epoch": 42.8, "eval_accuracy": 0.8527777777777777, "eval_loss": 1.3807573318481445, "eval_runtime": 3.1783, "eval_samples_per_second": 226.537, "eval_steps_per_second": 1.888, "step": 107 }, { "epoch": 44.0, "grad_norm": 0.8933643102645874, "learning_rate": 2.5e-05, "loss": 1.2312, "step": 110 }, { "epoch": 44.0, "eval_accuracy": 0.8458333333333333, "eval_loss": 1.3636168241500854, "eval_runtime": 3.356, "eval_samples_per_second": 214.543, "eval_steps_per_second": 1.788, "step": 110 }, { "epoch": 44.8, "eval_accuracy": 0.8486111111111111, "eval_loss": 1.3513400554656982, "eval_runtime": 3.2597, "eval_samples_per_second": 220.878, "eval_steps_per_second": 1.841, "step": 112 }, { "epoch": 46.0, "eval_accuracy": 0.8527777777777777, "eval_loss": 1.3329037427902222, "eval_runtime": 3.0929, "eval_samples_per_second": 232.788, "eval_steps_per_second": 1.94, "step": 115 }, { "epoch": 46.8, "eval_accuracy": 0.8527777777777777, "eval_loss": 1.319313883781433, "eval_runtime": 3.1007, "eval_samples_per_second": 232.203, "eval_steps_per_second": 1.935, "step": 117 }, { "epoch": 48.0, "grad_norm": 0.9017526507377625, "learning_rate": 2.2222222222222223e-05, "loss": 1.1368, "step": 120 }, { "epoch": 48.0, "eval_accuracy": 0.8527777777777777, "eval_loss": 1.3025320768356323, "eval_runtime": 3.1234, "eval_samples_per_second": 230.52, "eval_steps_per_second": 1.921, "step": 120 }, { "epoch": 48.8, "eval_accuracy": 0.8541666666666666, "eval_loss": 1.2945308685302734, "eval_runtime": 3.0692, "eval_samples_per_second": 234.59, "eval_steps_per_second": 1.955, "step": 122 }, { "epoch": 50.0, "eval_accuracy": 0.8527777777777777, "eval_loss": 1.2819503545761108, "eval_runtime": 3.1162, "eval_samples_per_second": 231.052, "eval_steps_per_second": 1.925, "step": 125 }, { "epoch": 50.8, "eval_accuracy": 0.8569444444444444, "eval_loss": 1.2704639434814453, "eval_runtime": 3.2771, "eval_samples_per_second": 219.706, "eval_steps_per_second": 1.831, "step": 127 }, { "epoch": 52.0, "grad_norm": 0.8930371999740601, "learning_rate": 1.9444444444444445e-05, "loss": 1.0821, "step": 130 }, { "epoch": 52.0, "eval_accuracy": 0.8583333333333333, "eval_loss": 1.2615665197372437, "eval_runtime": 3.1151, "eval_samples_per_second": 231.134, "eval_steps_per_second": 1.926, "step": 130 }, { "epoch": 52.8, "eval_accuracy": 0.8555555555555555, "eval_loss": 1.2545220851898193, "eval_runtime": 3.0635, "eval_samples_per_second": 235.024, "eval_steps_per_second": 1.959, "step": 132 }, { "epoch": 54.0, "eval_accuracy": 0.8541666666666666, "eval_loss": 1.2422840595245361, "eval_runtime": 3.0988, "eval_samples_per_second": 232.348, "eval_steps_per_second": 1.936, "step": 135 }, { "epoch": 54.8, "eval_accuracy": 0.8597222222222223, "eval_loss": 1.233168125152588, "eval_runtime": 3.1982, "eval_samples_per_second": 225.127, "eval_steps_per_second": 1.876, "step": 137 }, { "epoch": 56.0, "grad_norm": 0.8568278551101685, "learning_rate": 1.6666666666666667e-05, "loss": 1.0232, "step": 140 }, { "epoch": 56.0, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.2210274934768677, "eval_runtime": 3.355, "eval_samples_per_second": 214.604, "eval_steps_per_second": 1.788, "step": 140 }, { "epoch": 56.8, "eval_accuracy": 0.8625, "eval_loss": 1.2160966396331787, "eval_runtime": 3.127, "eval_samples_per_second": 230.252, "eval_steps_per_second": 1.919, "step": 142 }, { "epoch": 58.0, "eval_accuracy": 0.8569444444444444, "eval_loss": 1.209418535232544, "eval_runtime": 3.1685, "eval_samples_per_second": 227.239, "eval_steps_per_second": 1.894, "step": 145 }, { "epoch": 58.8, "eval_accuracy": 0.8541666666666666, "eval_loss": 1.205717921257019, "eval_runtime": 3.1918, "eval_samples_per_second": 225.576, "eval_steps_per_second": 1.88, "step": 147 }, { "epoch": 60.0, "grad_norm": 0.8556333780288696, "learning_rate": 1.388888888888889e-05, "loss": 0.9814, "step": 150 }, { "epoch": 60.0, "eval_accuracy": 0.85, "eval_loss": 1.1972941160202026, "eval_runtime": 3.1515, "eval_samples_per_second": 228.465, "eval_steps_per_second": 1.904, "step": 150 }, { "epoch": 60.8, "eval_accuracy": 0.8486111111111111, "eval_loss": 1.1918764114379883, "eval_runtime": 3.349, "eval_samples_per_second": 214.987, "eval_steps_per_second": 1.792, "step": 152 }, { "epoch": 62.0, "eval_accuracy": 0.8625, "eval_loss": 1.1825212240219116, "eval_runtime": 3.1401, "eval_samples_per_second": 229.295, "eval_steps_per_second": 1.911, "step": 155 }, { "epoch": 62.8, "eval_accuracy": 0.8597222222222223, "eval_loss": 1.179900050163269, "eval_runtime": 3.239, "eval_samples_per_second": 222.293, "eval_steps_per_second": 1.852, "step": 157 }, { "epoch": 64.0, "grad_norm": 0.827712893486023, "learning_rate": 1.1111111111111112e-05, "loss": 0.9415, "step": 160 }, { "epoch": 64.0, "eval_accuracy": 0.8597222222222223, "eval_loss": 1.1716293096542358, "eval_runtime": 3.1517, "eval_samples_per_second": 228.447, "eval_steps_per_second": 1.904, "step": 160 }, { "epoch": 64.8, "eval_accuracy": 0.8625, "eval_loss": 1.166538119316101, "eval_runtime": 3.22, "eval_samples_per_second": 223.601, "eval_steps_per_second": 1.863, "step": 162 }, { "epoch": 66.0, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1611206531524658, "eval_runtime": 3.323, "eval_samples_per_second": 216.67, "eval_steps_per_second": 1.806, "step": 165 }, { "epoch": 66.8, "eval_accuracy": 0.8625, "eval_loss": 1.1600357294082642, "eval_runtime": 3.2253, "eval_samples_per_second": 223.233, "eval_steps_per_second": 1.86, "step": 167 }, { "epoch": 68.0, "grad_norm": 0.9616327285766602, "learning_rate": 8.333333333333334e-06, "loss": 0.9135, "step": 170 }, { "epoch": 68.0, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1577341556549072, "eval_runtime": 3.1095, "eval_samples_per_second": 231.548, "eval_steps_per_second": 1.93, "step": 170 }, { "epoch": 68.8, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1546900272369385, "eval_runtime": 3.2289, "eval_samples_per_second": 222.988, "eval_steps_per_second": 1.858, "step": 172 }, { "epoch": 70.0, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1493217945098877, "eval_runtime": 3.103, "eval_samples_per_second": 232.033, "eval_steps_per_second": 1.934, "step": 175 }, { "epoch": 70.8, "eval_accuracy": 0.8611111111111112, "eval_loss": 1.1463639736175537, "eval_runtime": 3.3191, "eval_samples_per_second": 216.926, "eval_steps_per_second": 1.808, "step": 177 }, { "epoch": 72.0, "grad_norm": 0.8919360637664795, "learning_rate": 5.555555555555556e-06, "loss": 0.8946, "step": 180 }, { "epoch": 72.0, "eval_accuracy": 0.8555555555555555, "eval_loss": 1.1423206329345703, "eval_runtime": 3.0106, "eval_samples_per_second": 239.152, "eval_steps_per_second": 1.993, "step": 180 }, { "epoch": 72.8, "eval_accuracy": 0.8611111111111112, "eval_loss": 1.1402053833007812, "eval_runtime": 3.4453, "eval_samples_per_second": 208.981, "eval_steps_per_second": 1.742, "step": 182 }, { "epoch": 74.0, "eval_accuracy": 0.8583333333333333, "eval_loss": 1.1375410556793213, "eval_runtime": 3.1143, "eval_samples_per_second": 231.188, "eval_steps_per_second": 1.927, "step": 185 }, { "epoch": 74.8, "eval_accuracy": 0.8597222222222223, "eval_loss": 1.1360384225845337, "eval_runtime": 3.2332, "eval_samples_per_second": 222.688, "eval_steps_per_second": 1.856, "step": 187 }, { "epoch": 76.0, "grad_norm": 0.9335024356842041, "learning_rate": 2.777777777777778e-06, "loss": 0.8866, "step": 190 }, { "epoch": 76.0, "eval_accuracy": 0.8625, "eval_loss": 1.134353756904602, "eval_runtime": 3.1331, "eval_samples_per_second": 229.808, "eval_steps_per_second": 1.915, "step": 190 }, { "epoch": 76.8, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1333543062210083, "eval_runtime": 3.2199, "eval_samples_per_second": 223.611, "eval_steps_per_second": 1.863, "step": 192 }, { "epoch": 78.0, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1323890686035156, "eval_runtime": 3.0815, "eval_samples_per_second": 233.653, "eval_steps_per_second": 1.947, "step": 195 }, { "epoch": 78.8, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1320444345474243, "eval_runtime": 3.2638, "eval_samples_per_second": 220.604, "eval_steps_per_second": 1.838, "step": 197 }, { "epoch": 80.0, "grad_norm": 0.8473469614982605, "learning_rate": 0.0, "loss": 0.8798, "step": 200 }, { "epoch": 80.0, "eval_accuracy": 0.8638888888888889, "eval_loss": 1.1318904161453247, "eval_runtime": 3.2096, "eval_samples_per_second": 224.325, "eval_steps_per_second": 1.869, "step": 200 }, { "epoch": 80.0, "step": 200, "total_flos": 7.440697863438336e+18, "train_loss": 1.5909362745285034, "train_runtime": 1036.0381, "train_samples_per_second": 115.826, "train_steps_per_second": 0.193 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.440697863438336e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }