|
{ |
|
"best_metric": 0.8165137614678899, |
|
"best_model_checkpoint": "swiftformer-xs-dmae-va-U-80/checkpoint-255", |
|
"epoch": 36.12903225806452, |
|
"eval_steps": 500, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.3486238532110092, |
|
"eval_loss": 1.3780734539031982, |
|
"eval_runtime": 0.4072, |
|
"eval_samples_per_second": 267.678, |
|
"eval_steps_per_second": 9.823, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.3901, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.3302752293577982, |
|
"eval_loss": 1.353822112083435, |
|
"eval_runtime": 0.5603, |
|
"eval_samples_per_second": 194.555, |
|
"eval_steps_per_second": 7.14, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.3561, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.3761467889908257, |
|
"eval_loss": 1.3090553283691406, |
|
"eval_runtime": 0.4075, |
|
"eval_samples_per_second": 267.483, |
|
"eval_steps_per_second": 9.816, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 1.2933, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.42201834862385323, |
|
"eval_loss": 1.2622544765472412, |
|
"eval_runtime": 0.5245, |
|
"eval_samples_per_second": 207.799, |
|
"eval_steps_per_second": 7.626, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.5137614678899083, |
|
"eval_loss": 1.206465721130371, |
|
"eval_runtime": 0.4369, |
|
"eval_samples_per_second": 249.483, |
|
"eval_steps_per_second": 9.155, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.2274, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.5412844036697247, |
|
"eval_loss": 1.1352602243423462, |
|
"eval_runtime": 0.4157, |
|
"eval_samples_per_second": 262.219, |
|
"eval_steps_per_second": 9.623, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 1.1686, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.6146788990825688, |
|
"eval_loss": 1.0853842496871948, |
|
"eval_runtime": 0.4114, |
|
"eval_samples_per_second": 264.933, |
|
"eval_steps_per_second": 9.722, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 1.097, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6330275229357798, |
|
"eval_loss": 1.0488643646240234, |
|
"eval_runtime": 0.4198, |
|
"eval_samples_per_second": 259.629, |
|
"eval_steps_per_second": 9.528, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.6605504587155964, |
|
"eval_loss": 1.0184704065322876, |
|
"eval_runtime": 0.4158, |
|
"eval_samples_per_second": 262.135, |
|
"eval_steps_per_second": 9.62, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.0349, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.6422018348623854, |
|
"eval_loss": 0.968228280544281, |
|
"eval_runtime": 0.4229, |
|
"eval_samples_per_second": 257.766, |
|
"eval_steps_per_second": 9.459, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 1.0161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.6055045871559633, |
|
"eval_loss": 0.9511241912841797, |
|
"eval_runtime": 0.5485, |
|
"eval_samples_per_second": 198.707, |
|
"eval_steps_per_second": 7.292, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 0.9633, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6605504587155964, |
|
"eval_loss": 0.9009455442428589, |
|
"eval_runtime": 0.42, |
|
"eval_samples_per_second": 259.508, |
|
"eval_steps_per_second": 9.523, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.939, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.6513761467889908, |
|
"eval_loss": 0.9054697155952454, |
|
"eval_runtime": 0.457, |
|
"eval_samples_per_second": 238.519, |
|
"eval_steps_per_second": 8.753, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.6697247706422018, |
|
"eval_loss": 0.8781045079231262, |
|
"eval_runtime": 0.4171, |
|
"eval_samples_per_second": 261.315, |
|
"eval_steps_per_second": 9.59, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.9036, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.8493882417678833, |
|
"eval_runtime": 0.4124, |
|
"eval_samples_per_second": 264.29, |
|
"eval_steps_per_second": 9.699, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.8687, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6788990825688074, |
|
"eval_loss": 0.850315272808075, |
|
"eval_runtime": 0.4239, |
|
"eval_samples_per_second": 257.162, |
|
"eval_steps_per_second": 9.437, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.8535, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.8164355158805847, |
|
"eval_runtime": 0.4095, |
|
"eval_samples_per_second": 266.199, |
|
"eval_steps_per_second": 9.769, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.7614678899082569, |
|
"eval_loss": 0.7882992029190063, |
|
"eval_runtime": 0.4137, |
|
"eval_samples_per_second": 263.479, |
|
"eval_steps_per_second": 9.669, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.8306, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.7614678899082569, |
|
"eval_loss": 0.7667210698127747, |
|
"eval_runtime": 0.4184, |
|
"eval_samples_per_second": 260.52, |
|
"eval_steps_per_second": 9.56, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.8047, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7522935779816514, |
|
"eval_loss": 0.7600359320640564, |
|
"eval_runtime": 0.5597, |
|
"eval_samples_per_second": 194.752, |
|
"eval_steps_per_second": 7.147, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.7735, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.7614678899082569, |
|
"eval_loss": 0.7331238985061646, |
|
"eval_runtime": 0.4102, |
|
"eval_samples_per_second": 265.702, |
|
"eval_steps_per_second": 9.751, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.784, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.7522935779816514, |
|
"eval_loss": 0.7294867038726807, |
|
"eval_runtime": 0.4762, |
|
"eval_samples_per_second": 228.901, |
|
"eval_steps_per_second": 8.4, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.7431192660550459, |
|
"eval_loss": 0.7281246185302734, |
|
"eval_runtime": 0.4298, |
|
"eval_samples_per_second": 253.584, |
|
"eval_steps_per_second": 9.306, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.7596, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7614678899082569, |
|
"eval_loss": 0.7044692635536194, |
|
"eval_runtime": 0.4192, |
|
"eval_samples_per_second": 260.022, |
|
"eval_steps_per_second": 9.542, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.7609, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.6915290951728821, |
|
"eval_runtime": 0.4079, |
|
"eval_samples_per_second": 267.227, |
|
"eval_steps_per_second": 9.806, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.7307, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.6969889998435974, |
|
"eval_runtime": 0.4136, |
|
"eval_samples_per_second": 263.538, |
|
"eval_steps_per_second": 9.671, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.7614678899082569, |
|
"eval_loss": 0.6796095371246338, |
|
"eval_runtime": 0.4183, |
|
"eval_samples_per_second": 260.58, |
|
"eval_steps_per_second": 9.563, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.7263, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.6614620089530945, |
|
"eval_runtime": 0.4203, |
|
"eval_samples_per_second": 259.327, |
|
"eval_steps_per_second": 9.517, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.6933, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.662841796875, |
|
"eval_runtime": 0.5565, |
|
"eval_samples_per_second": 195.855, |
|
"eval_steps_per_second": 7.187, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.6914, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.6596092581748962, |
|
"eval_runtime": 0.4186, |
|
"eval_samples_per_second": 260.4, |
|
"eval_steps_per_second": 9.556, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.7192, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.6452586054801941, |
|
"eval_runtime": 0.6385, |
|
"eval_samples_per_second": 170.723, |
|
"eval_steps_per_second": 6.265, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.6569339632987976, |
|
"eval_runtime": 0.4062, |
|
"eval_samples_per_second": 268.371, |
|
"eval_steps_per_second": 9.848, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.6956, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.6494290828704834, |
|
"eval_runtime": 0.4328, |
|
"eval_samples_per_second": 251.861, |
|
"eval_steps_per_second": 9.243, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.7037, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.6478080153465271, |
|
"eval_runtime": 0.4185, |
|
"eval_samples_per_second": 260.424, |
|
"eval_steps_per_second": 9.557, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.669, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.641533613204956, |
|
"eval_runtime": 0.4124, |
|
"eval_samples_per_second": 264.279, |
|
"eval_steps_per_second": 9.698, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.6440734267234802, |
|
"eval_runtime": 0.4379, |
|
"eval_samples_per_second": 248.897, |
|
"eval_steps_per_second": 9.134, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"learning_rate": 0.0, |
|
"loss": 0.6715, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.6445415616035461, |
|
"eval_runtime": 0.5619, |
|
"eval_samples_per_second": 193.991, |
|
"eval_steps_per_second": 7.119, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"step": 280, |
|
"total_flos": 9.686412043576934e+16, |
|
"train_loss": 0.8973502908434187, |
|
"train_runtime": 243.9108, |
|
"train_samples_per_second": 160.386, |
|
"train_steps_per_second": 1.148 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 280, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 9.686412043576934e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|