|
{ |
|
"best_metric": 0.7614678899082569, |
|
"best_model_checkpoint": "swiftformer-xs-dmae-va-U-SF/checkpoint-271", |
|
"epoch": 36.12903225806452, |
|
"eval_steps": 500, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.3119266055045872, |
|
"eval_loss": 1.3887187242507935, |
|
"eval_runtime": 0.6715, |
|
"eval_samples_per_second": 162.322, |
|
"eval_steps_per_second": 5.957, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.4383, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.41284403669724773, |
|
"eval_loss": 1.344041347503662, |
|
"eval_runtime": 0.4343, |
|
"eval_samples_per_second": 250.972, |
|
"eval_steps_per_second": 9.21, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.3956, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.3761467889908257, |
|
"eval_loss": 1.3158775568008423, |
|
"eval_runtime": 0.5875, |
|
"eval_samples_per_second": 185.531, |
|
"eval_steps_per_second": 6.808, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 1.36, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3853211009174312, |
|
"eval_loss": 1.2906575202941895, |
|
"eval_runtime": 0.416, |
|
"eval_samples_per_second": 262.05, |
|
"eval_steps_per_second": 9.617, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.44036697247706424, |
|
"eval_loss": 1.2488025426864624, |
|
"eval_runtime": 0.4344, |
|
"eval_samples_per_second": 250.911, |
|
"eval_steps_per_second": 9.208, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.2912, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.4036697247706422, |
|
"eval_loss": 1.2129390239715576, |
|
"eval_runtime": 0.4322, |
|
"eval_samples_per_second": 252.176, |
|
"eval_steps_per_second": 9.254, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 1.2387, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.46788990825688076, |
|
"eval_loss": 1.1733514070510864, |
|
"eval_runtime": 0.4452, |
|
"eval_samples_per_second": 244.821, |
|
"eval_steps_per_second": 8.984, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 1.1607, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5137614678899083, |
|
"eval_loss": 1.143617868423462, |
|
"eval_runtime": 0.4224, |
|
"eval_samples_per_second": 258.05, |
|
"eval_steps_per_second": 9.47, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.4954128440366973, |
|
"eval_loss": 1.0990597009658813, |
|
"eval_runtime": 0.459, |
|
"eval_samples_per_second": 237.483, |
|
"eval_steps_per_second": 8.715, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.1224, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.5504587155963303, |
|
"eval_loss": 1.0478596687316895, |
|
"eval_runtime": 0.7035, |
|
"eval_samples_per_second": 154.95, |
|
"eval_steps_per_second": 5.686, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 1.0547, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.5963302752293578, |
|
"eval_loss": 0.9993337392807007, |
|
"eval_runtime": 0.4348, |
|
"eval_samples_per_second": 250.685, |
|
"eval_steps_per_second": 9.199, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 1.0137, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6146788990825688, |
|
"eval_loss": 0.9859614372253418, |
|
"eval_runtime": 0.5726, |
|
"eval_samples_per_second": 190.361, |
|
"eval_steps_per_second": 6.986, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.9652, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.6146788990825688, |
|
"eval_loss": 0.9697992205619812, |
|
"eval_runtime": 0.4313, |
|
"eval_samples_per_second": 252.708, |
|
"eval_steps_per_second": 9.274, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.6055045871559633, |
|
"eval_loss": 0.9519100785255432, |
|
"eval_runtime": 0.4407, |
|
"eval_samples_per_second": 247.344, |
|
"eval_steps_per_second": 9.077, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.9217, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.6055045871559633, |
|
"eval_loss": 0.9242026209831238, |
|
"eval_runtime": 0.4297, |
|
"eval_samples_per_second": 253.648, |
|
"eval_steps_per_second": 9.308, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.9122, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6146788990825688, |
|
"eval_loss": 0.9062366485595703, |
|
"eval_runtime": 0.4329, |
|
"eval_samples_per_second": 251.781, |
|
"eval_steps_per_second": 9.24, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.8763, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.6422018348623854, |
|
"eval_loss": 0.8872672915458679, |
|
"eval_runtime": 0.4224, |
|
"eval_samples_per_second": 258.055, |
|
"eval_steps_per_second": 9.47, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.6513761467889908, |
|
"eval_loss": 0.8477428555488586, |
|
"eval_runtime": 0.5523, |
|
"eval_samples_per_second": 197.365, |
|
"eval_steps_per_second": 7.243, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.8471, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.6513761467889908, |
|
"eval_loss": 0.8427405953407288, |
|
"eval_runtime": 0.6098, |
|
"eval_samples_per_second": 178.737, |
|
"eval_steps_per_second": 6.559, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.8331, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6880733944954128, |
|
"eval_loss": 0.8257479667663574, |
|
"eval_runtime": 0.438, |
|
"eval_samples_per_second": 248.852, |
|
"eval_steps_per_second": 9.132, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.8167, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.6880733944954128, |
|
"eval_loss": 0.8025383353233337, |
|
"eval_runtime": 0.4399, |
|
"eval_samples_per_second": 247.761, |
|
"eval_steps_per_second": 9.092, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.8022, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.8010965585708618, |
|
"eval_runtime": 0.4311, |
|
"eval_samples_per_second": 252.864, |
|
"eval_steps_per_second": 9.279, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.8078291416168213, |
|
"eval_runtime": 0.4412, |
|
"eval_samples_per_second": 247.041, |
|
"eval_steps_per_second": 9.066, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.7996, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7064220183486238, |
|
"eval_loss": 0.7920359969139099, |
|
"eval_runtime": 0.4625, |
|
"eval_samples_per_second": 235.682, |
|
"eval_steps_per_second": 8.649, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.7962, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.760365903377533, |
|
"eval_runtime": 0.4325, |
|
"eval_samples_per_second": 251.996, |
|
"eval_steps_per_second": 9.248, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.7268, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.7596898674964905, |
|
"eval_runtime": 0.568, |
|
"eval_samples_per_second": 191.903, |
|
"eval_steps_per_second": 7.042, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.7461942434310913, |
|
"eval_runtime": 0.4265, |
|
"eval_samples_per_second": 255.55, |
|
"eval_steps_per_second": 9.378, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.7477, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7064220183486238, |
|
"eval_loss": 0.7316110730171204, |
|
"eval_runtime": 0.6404, |
|
"eval_samples_per_second": 170.198, |
|
"eval_steps_per_second": 6.246, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.7411, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.7522935779816514, |
|
"eval_loss": 0.7275413870811462, |
|
"eval_runtime": 0.4398, |
|
"eval_samples_per_second": 247.819, |
|
"eval_steps_per_second": 9.094, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.7415, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.7210223078727722, |
|
"eval_runtime": 0.4316, |
|
"eval_samples_per_second": 252.526, |
|
"eval_steps_per_second": 9.267, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.7159, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.727079451084137, |
|
"eval_runtime": 0.4278, |
|
"eval_samples_per_second": 254.812, |
|
"eval_steps_per_second": 9.351, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7431192660550459, |
|
"eval_loss": 0.700477659702301, |
|
"eval_runtime": 0.4579, |
|
"eval_samples_per_second": 238.037, |
|
"eval_steps_per_second": 8.735, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.7322, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.7431192660550459, |
|
"eval_loss": 0.7012345194816589, |
|
"eval_runtime": 0.4583, |
|
"eval_samples_per_second": 237.855, |
|
"eval_steps_per_second": 8.729, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.7124, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.7522935779816514, |
|
"eval_loss": 0.7052269577980042, |
|
"eval_runtime": 0.4302, |
|
"eval_samples_per_second": 253.362, |
|
"eval_steps_per_second": 9.298, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.7194, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.7614678899082569, |
|
"eval_loss": 0.6964432597160339, |
|
"eval_runtime": 0.6076, |
|
"eval_samples_per_second": 179.38, |
|
"eval_steps_per_second": 6.583, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7522935779816514, |
|
"eval_loss": 0.7007263898849487, |
|
"eval_runtime": 0.4321, |
|
"eval_samples_per_second": 252.256, |
|
"eval_steps_per_second": 9.257, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"learning_rate": 0.0, |
|
"loss": 0.6903, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"eval_accuracy": 0.7431192660550459, |
|
"eval_loss": 0.6986872553825378, |
|
"eval_runtime": 0.4293, |
|
"eval_samples_per_second": 253.895, |
|
"eval_steps_per_second": 9.317, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"step": 280, |
|
"total_flos": 9.686412043576934e+16, |
|
"train_loss": 0.9347471083913531, |
|
"train_runtime": 250.0799, |
|
"train_samples_per_second": 156.43, |
|
"train_steps_per_second": 1.12 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 280, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 9.686412043576934e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|