|
{ |
|
"best_metric": 0.8256880733944955, |
|
"best_model_checkpoint": "swiftformer-xs-dmae-va-U-80/checkpoint-480", |
|
"epoch": 72.25806451612904, |
|
"eval_steps": 500, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.28440366972477066, |
|
"eval_loss": 1.3862813711166382, |
|
"eval_runtime": 0.4815, |
|
"eval_samples_per_second": 226.357, |
|
"eval_steps_per_second": 8.307, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 1.4158, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.3119266055045872, |
|
"eval_loss": 1.376009225845337, |
|
"eval_runtime": 0.4374, |
|
"eval_samples_per_second": 249.207, |
|
"eval_steps_per_second": 9.145, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.3853, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.3853211009174312, |
|
"eval_loss": 1.3548262119293213, |
|
"eval_runtime": 0.4385, |
|
"eval_samples_per_second": 248.6, |
|
"eval_steps_per_second": 9.123, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 1.3745, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3394495412844037, |
|
"eval_loss": 1.3327373266220093, |
|
"eval_runtime": 0.4339, |
|
"eval_samples_per_second": 251.182, |
|
"eval_steps_per_second": 9.218, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.42201834862385323, |
|
"eval_loss": 1.2938013076782227, |
|
"eval_runtime": 0.425, |
|
"eval_samples_per_second": 256.451, |
|
"eval_steps_per_second": 9.411, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.3435, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.46788990825688076, |
|
"eval_loss": 1.244996428489685, |
|
"eval_runtime": 0.4464, |
|
"eval_samples_per_second": 244.203, |
|
"eval_steps_per_second": 8.962, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 1.2681, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.5596330275229358, |
|
"eval_loss": 1.193323016166687, |
|
"eval_runtime": 0.4543, |
|
"eval_samples_per_second": 239.924, |
|
"eval_steps_per_second": 8.805, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 1.1803, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.47706422018348627, |
|
"eval_loss": 1.1410021781921387, |
|
"eval_runtime": 0.5443, |
|
"eval_samples_per_second": 200.245, |
|
"eval_steps_per_second": 7.348, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.5045871559633027, |
|
"eval_loss": 1.1014117002487183, |
|
"eval_runtime": 0.4243, |
|
"eval_samples_per_second": 256.877, |
|
"eval_steps_per_second": 9.427, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.1277, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.5321100917431193, |
|
"eval_loss": 1.0785263776779175, |
|
"eval_runtime": 0.5813, |
|
"eval_samples_per_second": 187.522, |
|
"eval_steps_per_second": 6.882, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.0674, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.5596330275229358, |
|
"eval_loss": 1.0439504384994507, |
|
"eval_runtime": 0.877, |
|
"eval_samples_per_second": 124.292, |
|
"eval_steps_per_second": 4.561, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 4.662698412698413e-05, |
|
"loss": 1.0353, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5779816513761468, |
|
"eval_loss": 0.9961591362953186, |
|
"eval_runtime": 0.4276, |
|
"eval_samples_per_second": 254.912, |
|
"eval_steps_per_second": 9.355, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 0.9859, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.5871559633027523, |
|
"eval_loss": 0.9699747562408447, |
|
"eval_runtime": 0.488, |
|
"eval_samples_per_second": 223.356, |
|
"eval_steps_per_second": 8.197, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.6422018348623854, |
|
"eval_loss": 0.9401906728744507, |
|
"eval_runtime": 0.426, |
|
"eval_samples_per_second": 255.868, |
|
"eval_steps_per_second": 9.39, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.9397, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.6238532110091743, |
|
"eval_loss": 0.9215461015701294, |
|
"eval_runtime": 0.595, |
|
"eval_samples_per_second": 183.202, |
|
"eval_steps_per_second": 6.723, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.8959, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6605504587155964, |
|
"eval_loss": 0.8744844794273376, |
|
"eval_runtime": 0.4502, |
|
"eval_samples_per_second": 242.121, |
|
"eval_steps_per_second": 8.885, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 4.265873015873016e-05, |
|
"loss": 0.8663, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.6697247706422018, |
|
"eval_loss": 0.8560922145843506, |
|
"eval_runtime": 0.4537, |
|
"eval_samples_per_second": 240.261, |
|
"eval_steps_per_second": 8.817, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.6788990825688074, |
|
"eval_loss": 0.8181523084640503, |
|
"eval_runtime": 0.4306, |
|
"eval_samples_per_second": 253.131, |
|
"eval_steps_per_second": 9.289, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.8405, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.6513761467889908, |
|
"eval_loss": 0.8167656660079956, |
|
"eval_runtime": 0.4512, |
|
"eval_samples_per_second": 241.596, |
|
"eval_steps_per_second": 8.866, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 4.067460317460318e-05, |
|
"loss": 0.8093, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6788990825688074, |
|
"eval_loss": 0.8038831949234009, |
|
"eval_runtime": 0.434, |
|
"eval_samples_per_second": 251.165, |
|
"eval_steps_per_second": 9.217, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.7396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.7064220183486238, |
|
"eval_loss": 0.7477715015411377, |
|
"eval_runtime": 0.44, |
|
"eval_samples_per_second": 247.725, |
|
"eval_steps_per_second": 9.091, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 3.8690476190476195e-05, |
|
"loss": 0.7588, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.7236711978912354, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 177.862, |
|
"eval_steps_per_second": 6.527, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.7155963302752294, |
|
"eval_loss": 0.7030771374702454, |
|
"eval_runtime": 0.4488, |
|
"eval_samples_per_second": 242.893, |
|
"eval_steps_per_second": 8.913, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 0.7189, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.6955855488777161, |
|
"eval_runtime": 0.4384, |
|
"eval_samples_per_second": 248.655, |
|
"eval_steps_per_second": 9.125, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 3.6706349206349205e-05, |
|
"loss": 0.7111, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.7247706422018348, |
|
"eval_loss": 0.6749173998832703, |
|
"eval_runtime": 0.4278, |
|
"eval_samples_per_second": 254.789, |
|
"eval_steps_per_second": 9.35, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.6577, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.6972477064220184, |
|
"eval_loss": 0.6757855415344238, |
|
"eval_runtime": 0.433, |
|
"eval_samples_per_second": 251.741, |
|
"eval_steps_per_second": 9.238, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.7339449541284404, |
|
"eval_loss": 0.6429359316825867, |
|
"eval_runtime": 0.6106, |
|
"eval_samples_per_second": 178.522, |
|
"eval_steps_per_second": 6.551, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.6681, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7064220183486238, |
|
"eval_loss": 0.6450981497764587, |
|
"eval_runtime": 0.4394, |
|
"eval_samples_per_second": 248.065, |
|
"eval_steps_per_second": 9.103, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.6238, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.7339449541284404, |
|
"eval_loss": 0.6367726922035217, |
|
"eval_runtime": 0.61, |
|
"eval_samples_per_second": 178.699, |
|
"eval_steps_per_second": 6.558, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 3.273809523809524e-05, |
|
"loss": 0.6136, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.6232722997665405, |
|
"eval_runtime": 0.4356, |
|
"eval_samples_per_second": 250.256, |
|
"eval_steps_per_second": 9.184, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.5934, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.6160764098167419, |
|
"eval_runtime": 0.4496, |
|
"eval_samples_per_second": 242.42, |
|
"eval_steps_per_second": 8.896, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7431192660550459, |
|
"eval_loss": 0.626839816570282, |
|
"eval_runtime": 0.44, |
|
"eval_samples_per_second": 247.748, |
|
"eval_steps_per_second": 9.092, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 3.075396825396826e-05, |
|
"loss": 0.5807, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.5879014730453491, |
|
"eval_runtime": 0.449, |
|
"eval_samples_per_second": 242.778, |
|
"eval_steps_per_second": 8.909, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.575, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.5772398710250854, |
|
"eval_runtime": 0.4322, |
|
"eval_samples_per_second": 252.185, |
|
"eval_steps_per_second": 9.254, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 2.876984126984127e-05, |
|
"loss": 0.5409, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.5702607035636902, |
|
"eval_runtime": 0.4332, |
|
"eval_samples_per_second": 251.604, |
|
"eval_steps_per_second": 9.233, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.5603441596031189, |
|
"eval_runtime": 0.6241, |
|
"eval_samples_per_second": 174.639, |
|
"eval_steps_per_second": 6.409, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.553, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.5560410022735596, |
|
"eval_runtime": 0.4375, |
|
"eval_samples_per_second": 249.161, |
|
"eval_steps_per_second": 9.144, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.515, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.5639447569847107, |
|
"eval_runtime": 0.4367, |
|
"eval_samples_per_second": 249.625, |
|
"eval_steps_per_second": 9.161, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.5424, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 38.97, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.5483418107032776, |
|
"eval_runtime": 0.4272, |
|
"eval_samples_per_second": 255.167, |
|
"eval_steps_per_second": 9.364, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.4801587301587305e-05, |
|
"loss": 0.5193, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.5505456924438477, |
|
"eval_runtime": 0.4357, |
|
"eval_samples_per_second": 250.151, |
|
"eval_steps_per_second": 9.18, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.9, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.5322768092155457, |
|
"eval_runtime": 0.438, |
|
"eval_samples_per_second": 248.859, |
|
"eval_steps_per_second": 9.132, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.5123, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.5257070064544678, |
|
"eval_runtime": 0.4489, |
|
"eval_samples_per_second": 242.816, |
|
"eval_steps_per_second": 8.911, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 42.58, |
|
"learning_rate": 2.281746031746032e-05, |
|
"loss": 0.4719, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 42.97, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.5269966125488281, |
|
"eval_runtime": 0.7214, |
|
"eval_samples_per_second": 151.1, |
|
"eval_steps_per_second": 5.545, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.4583, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7706422018348624, |
|
"eval_loss": 0.5304660201072693, |
|
"eval_runtime": 0.4302, |
|
"eval_samples_per_second": 253.364, |
|
"eval_steps_per_second": 9.298, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 44.9, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.5281646251678467, |
|
"eval_runtime": 0.6252, |
|
"eval_samples_per_second": 174.336, |
|
"eval_steps_per_second": 6.398, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.4568, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 45.94, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.5177940130233765, |
|
"eval_runtime": 0.4569, |
|
"eval_samples_per_second": 238.568, |
|
"eval_steps_per_second": 8.755, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.4717, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 46.97, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.49453452229499817, |
|
"eval_runtime": 0.4248, |
|
"eval_samples_per_second": 256.572, |
|
"eval_steps_per_second": 9.415, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"learning_rate": 1.884920634920635e-05, |
|
"loss": 0.4587, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.4978266656398773, |
|
"eval_runtime": 0.4615, |
|
"eval_samples_per_second": 236.178, |
|
"eval_steps_per_second": 8.667, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 48.9, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.4887874722480774, |
|
"eval_runtime": 0.4558, |
|
"eval_samples_per_second": 239.119, |
|
"eval_steps_per_second": 8.775, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 49.03, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.4314, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 49.94, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.48671984672546387, |
|
"eval_runtime": 0.4423, |
|
"eval_samples_per_second": 246.43, |
|
"eval_steps_per_second": 9.043, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 50.32, |
|
"learning_rate": 1.6865079365079367e-05, |
|
"loss": 0.4389, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 50.97, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 0.4739398658275604, |
|
"eval_runtime": 0.4484, |
|
"eval_samples_per_second": 243.061, |
|
"eval_steps_per_second": 8.92, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.4115, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.4844360053539276, |
|
"eval_runtime": 0.6269, |
|
"eval_samples_per_second": 173.869, |
|
"eval_steps_per_second": 6.381, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"learning_rate": 1.4880952380952381e-05, |
|
"loss": 0.4323, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.4818795323371887, |
|
"eval_runtime": 0.4584, |
|
"eval_samples_per_second": 237.796, |
|
"eval_steps_per_second": 8.726, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 53.94, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.45617520809173584, |
|
"eval_runtime": 0.4517, |
|
"eval_samples_per_second": 241.332, |
|
"eval_steps_per_second": 8.856, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 54.19, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.3855, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 54.97, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.4639626145362854, |
|
"eval_runtime": 0.4274, |
|
"eval_samples_per_second": 255.051, |
|
"eval_steps_per_second": 9.36, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 55.48, |
|
"learning_rate": 1.2896825396825398e-05, |
|
"loss": 0.4113, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.44737786054611206, |
|
"eval_runtime": 0.4453, |
|
"eval_samples_per_second": 244.764, |
|
"eval_steps_per_second": 8.982, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 56.77, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.4282, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 56.9, |
|
"eval_accuracy": 0.7981651376146789, |
|
"eval_loss": 0.45404085516929626, |
|
"eval_runtime": 0.4314, |
|
"eval_samples_per_second": 252.681, |
|
"eval_steps_per_second": 9.273, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.44497042894363403, |
|
"eval_runtime": 0.4614, |
|
"eval_samples_per_second": 236.26, |
|
"eval_steps_per_second": 8.67, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 1.0912698412698414e-05, |
|
"loss": 0.4499, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.44970786571502686, |
|
"eval_runtime": 0.633, |
|
"eval_samples_per_second": 172.2, |
|
"eval_steps_per_second": 6.319, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 59.35, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.4179, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.44003376364707947, |
|
"eval_runtime": 0.4509, |
|
"eval_samples_per_second": 241.755, |
|
"eval_steps_per_second": 8.872, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 60.65, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.4213, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 60.9, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.4391891658306122, |
|
"eval_runtime": 0.4498, |
|
"eval_samples_per_second": 242.327, |
|
"eval_steps_per_second": 8.893, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.4176, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"eval_accuracy": 0.8256880733944955, |
|
"eval_loss": 0.43245241045951843, |
|
"eval_runtime": 0.4412, |
|
"eval_samples_per_second": 247.039, |
|
"eval_steps_per_second": 9.066, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 62.97, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.42957109212875366, |
|
"eval_runtime": 0.4491, |
|
"eval_samples_per_second": 242.688, |
|
"eval_steps_per_second": 8.906, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 63.23, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.4083, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.43877631425857544, |
|
"eval_runtime": 0.4459, |
|
"eval_samples_per_second": 244.475, |
|
"eval_steps_per_second": 8.972, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.3853, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 64.9, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.43918871879577637, |
|
"eval_runtime": 0.4466, |
|
"eval_samples_per_second": 244.06, |
|
"eval_steps_per_second": 8.956, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 65.81, |
|
"learning_rate": 4.96031746031746e-06, |
|
"loss": 0.3647, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 65.94, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.4348970353603363, |
|
"eval_runtime": 0.6039, |
|
"eval_samples_per_second": 180.506, |
|
"eval_steps_per_second": 6.624, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 66.97, |
|
"eval_accuracy": 0.8256880733944955, |
|
"eval_loss": 0.4343608319759369, |
|
"eval_runtime": 0.4394, |
|
"eval_samples_per_second": 248.071, |
|
"eval_steps_per_second": 9.104, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 67.1, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.3927, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.43479177355766296, |
|
"eval_runtime": 0.4462, |
|
"eval_samples_per_second": 244.289, |
|
"eval_steps_per_second": 8.965, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 68.39, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"loss": 0.3833, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 68.9, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.4351903796195984, |
|
"eval_runtime": 0.4439, |
|
"eval_samples_per_second": 245.538, |
|
"eval_steps_per_second": 9.011, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 69.68, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.3932, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 69.94, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.4294258952140808, |
|
"eval_runtime": 0.4336, |
|
"eval_samples_per_second": 251.361, |
|
"eval_steps_per_second": 9.224, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"learning_rate": 9.92063492063492e-07, |
|
"loss": 0.4085, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.42758503556251526, |
|
"eval_runtime": 0.4574, |
|
"eval_samples_per_second": 238.283, |
|
"eval_steps_per_second": 8.744, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.4231923222541809, |
|
"eval_runtime": 0.4385, |
|
"eval_samples_per_second": 248.573, |
|
"eval_steps_per_second": 9.122, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 72.26, |
|
"learning_rate": 0.0, |
|
"loss": 0.4029, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 72.26, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.435944139957428, |
|
"eval_runtime": 0.466, |
|
"eval_samples_per_second": 233.911, |
|
"eval_steps_per_second": 8.584, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 72.26, |
|
"step": 560, |
|
"total_flos": 1.937282408715387e+17, |
|
"train_loss": 0.6618133915322167, |
|
"train_runtime": 514.9079, |
|
"train_samples_per_second": 151.95, |
|
"train_steps_per_second": 1.088 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 560, |
|
"num_train_epochs": 80, |
|
"save_steps": 500, |
|
"total_flos": 1.937282408715387e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|