|
{ |
|
"best_metric": 0.9655172413793104, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-da2-40/checkpoint-46", |
|
"epoch": 37.64705882352941, |
|
"eval_steps": 500, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.43103448275862066, |
|
"eval_loss": 1.29306960105896, |
|
"eval_runtime": 1.9553, |
|
"eval_samples_per_second": 29.663, |
|
"eval_steps_per_second": 1.023, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.5517241379310345, |
|
"eval_loss": 1.2023708820343018, |
|
"eval_runtime": 1.4877, |
|
"eval_samples_per_second": 38.987, |
|
"eval_steps_per_second": 1.344, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.2651, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.6551724137931034, |
|
"eval_loss": 0.9896121621131897, |
|
"eval_runtime": 1.3422, |
|
"eval_samples_per_second": 43.212, |
|
"eval_steps_per_second": 1.49, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.7972208261489868, |
|
"eval_runtime": 1.1676, |
|
"eval_samples_per_second": 49.674, |
|
"eval_steps_per_second": 1.713, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_accuracy": 0.6551724137931034, |
|
"eval_loss": 0.7336073517799377, |
|
"eval_runtime": 1.4618, |
|
"eval_samples_per_second": 39.678, |
|
"eval_steps_per_second": 1.368, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.7523, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_accuracy": 0.8103448275862069, |
|
"eval_loss": 0.5780647397041321, |
|
"eval_runtime": 1.4658, |
|
"eval_samples_per_second": 39.569, |
|
"eval_steps_per_second": 1.364, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"eval_accuracy": 0.8793103448275862, |
|
"eval_loss": 0.4911504089832306, |
|
"eval_runtime": 1.1824, |
|
"eval_samples_per_second": 49.051, |
|
"eval_steps_per_second": 1.691, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9137931034482759, |
|
"eval_loss": 0.41120994091033936, |
|
"eval_runtime": 1.1976, |
|
"eval_samples_per_second": 48.428, |
|
"eval_steps_per_second": 1.67, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.4209, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_accuracy": 0.9137931034482759, |
|
"eval_loss": 0.338276743888855, |
|
"eval_runtime": 1.1796, |
|
"eval_samples_per_second": 49.168, |
|
"eval_steps_per_second": 1.695, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.3129003047943115, |
|
"eval_runtime": 1.1761, |
|
"eval_samples_per_second": 49.316, |
|
"eval_steps_per_second": 1.701, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_accuracy": 0.9655172413793104, |
|
"eval_loss": 0.26601171493530273, |
|
"eval_runtime": 1.1866, |
|
"eval_samples_per_second": 48.88, |
|
"eval_steps_per_second": 1.686, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2647, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9310344827586207, |
|
"eval_loss": 0.3184232711791992, |
|
"eval_runtime": 1.1991, |
|
"eval_samples_per_second": 48.371, |
|
"eval_steps_per_second": 1.668, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"eval_accuracy": 0.9310344827586207, |
|
"eval_loss": 0.2870759963989258, |
|
"eval_runtime": 1.1806, |
|
"eval_samples_per_second": 49.126, |
|
"eval_steps_per_second": 1.694, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"eval_accuracy": 0.9137931034482759, |
|
"eval_loss": 0.27659285068511963, |
|
"eval_runtime": 1.4456, |
|
"eval_samples_per_second": 40.122, |
|
"eval_steps_per_second": 1.384, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.1743, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"eval_accuracy": 0.896551724137931, |
|
"eval_loss": 0.2726757526397705, |
|
"eval_runtime": 1.1844, |
|
"eval_samples_per_second": 48.97, |
|
"eval_steps_per_second": 1.689, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9310344827586207, |
|
"eval_loss": 0.2282419204711914, |
|
"eval_runtime": 1.198, |
|
"eval_samples_per_second": 48.414, |
|
"eval_steps_per_second": 1.669, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.1511, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"eval_accuracy": 0.896551724137931, |
|
"eval_loss": 0.2891761064529419, |
|
"eval_runtime": 1.1918, |
|
"eval_samples_per_second": 48.667, |
|
"eval_steps_per_second": 1.678, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"eval_accuracy": 0.896551724137931, |
|
"eval_loss": 0.2482166588306427, |
|
"eval_runtime": 1.1922, |
|
"eval_samples_per_second": 48.65, |
|
"eval_steps_per_second": 1.678, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"eval_accuracy": 0.9310344827586207, |
|
"eval_loss": 0.2363000512123108, |
|
"eval_runtime": 1.4565, |
|
"eval_samples_per_second": 39.822, |
|
"eval_steps_per_second": 1.373, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.1253, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.16224975883960724, |
|
"eval_runtime": 1.5442, |
|
"eval_samples_per_second": 37.56, |
|
"eval_steps_per_second": 1.295, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 20.94, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.17531706392765045, |
|
"eval_runtime": 1.1991, |
|
"eval_samples_per_second": 48.37, |
|
"eval_steps_per_second": 1.668, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"eval_accuracy": 0.9655172413793104, |
|
"eval_loss": 0.1592673361301422, |
|
"eval_runtime": 1.186, |
|
"eval_samples_per_second": 48.905, |
|
"eval_steps_per_second": 1.686, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 22.59, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.087, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 22.82, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.1334347277879715, |
|
"eval_runtime": 1.1763, |
|
"eval_samples_per_second": 49.307, |
|
"eval_steps_per_second": 1.7, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.10879910737276077, |
|
"eval_runtime": 1.2016, |
|
"eval_samples_per_second": 48.269, |
|
"eval_steps_per_second": 1.664, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 24.94, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.11301546543836594, |
|
"eval_runtime": 1.2067, |
|
"eval_samples_per_second": 48.064, |
|
"eval_steps_per_second": 1.657, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.0856, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"eval_accuracy": 0.9137931034482759, |
|
"eval_loss": 0.1459188610315323, |
|
"eval_runtime": 1.1981, |
|
"eval_samples_per_second": 48.408, |
|
"eval_steps_per_second": 1.669, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"eval_accuracy": 0.9655172413793104, |
|
"eval_loss": 0.14451515674591064, |
|
"eval_runtime": 1.1942, |
|
"eval_samples_per_second": 48.568, |
|
"eval_steps_per_second": 1.675, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9655172413793104, |
|
"eval_loss": 0.1233600303530693, |
|
"eval_runtime": 1.1858, |
|
"eval_samples_per_second": 48.911, |
|
"eval_steps_per_second": 1.687, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.081, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 28.94, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.122359499335289, |
|
"eval_runtime": 1.1811, |
|
"eval_samples_per_second": 49.106, |
|
"eval_steps_per_second": 1.693, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 29.88, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.13033324480056763, |
|
"eval_runtime": 1.2613, |
|
"eval_samples_per_second": 45.985, |
|
"eval_steps_per_second": 1.586, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.13718828558921814, |
|
"eval_runtime": 1.5095, |
|
"eval_samples_per_second": 38.423, |
|
"eval_steps_per_second": 1.325, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.0554, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.14210258424282074, |
|
"eval_runtime": 1.1798, |
|
"eval_samples_per_second": 49.159, |
|
"eval_steps_per_second": 1.695, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.13068123161792755, |
|
"eval_runtime": 1.1849, |
|
"eval_samples_per_second": 48.949, |
|
"eval_steps_per_second": 1.688, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0783, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.12443631142377853, |
|
"eval_runtime": 1.19, |
|
"eval_samples_per_second": 48.739, |
|
"eval_steps_per_second": 1.681, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 34.82, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.11953343451023102, |
|
"eval_runtime": 1.2234, |
|
"eval_samples_per_second": 47.409, |
|
"eval_steps_per_second": 1.635, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.11707677692174911, |
|
"eval_runtime": 1.1823, |
|
"eval_samples_per_second": 49.059, |
|
"eval_steps_per_second": 1.692, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.0646, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 36.94, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.1165328100323677, |
|
"eval_runtime": 1.1871, |
|
"eval_samples_per_second": 48.857, |
|
"eval_steps_per_second": 1.685, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 37.65, |
|
"eval_accuracy": 0.9482758620689655, |
|
"eval_loss": 0.11630651354789734, |
|
"eval_runtime": 1.1868, |
|
"eval_samples_per_second": 48.872, |
|
"eval_steps_per_second": 1.685, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 37.65, |
|
"step": 160, |
|
"total_flos": 1.5182502752352338e+18, |
|
"train_loss": 0.2719826718792319, |
|
"train_runtime": 1073.8442, |
|
"train_samples_per_second": 19.37, |
|
"train_steps_per_second": 0.149 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 160, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 1.5182502752352338e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|