{ "best_metric": 0.8256880733944955, "best_model_checkpoint": "swiftformer-xs-dmae-va-U-80/checkpoint-480", "epoch": 72.25806451612904, "eval_steps": 500, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.28440366972477066, "eval_loss": 1.3862813711166382, "eval_runtime": 0.4815, "eval_samples_per_second": 226.357, "eval_steps_per_second": 8.307, "step": 7 }, { "epoch": 1.29, "learning_rate": 8.92857142857143e-06, "loss": 1.4158, "step": 10 }, { "epoch": 1.94, "eval_accuracy": 0.3119266055045872, "eval_loss": 1.376009225845337, "eval_runtime": 0.4374, "eval_samples_per_second": 249.207, "eval_steps_per_second": 9.145, "step": 15 }, { "epoch": 2.58, "learning_rate": 1.785714285714286e-05, "loss": 1.3853, "step": 20 }, { "epoch": 2.97, "eval_accuracy": 0.3853211009174312, "eval_loss": 1.3548262119293213, "eval_runtime": 0.4385, "eval_samples_per_second": 248.6, "eval_steps_per_second": 9.123, "step": 23 }, { "epoch": 3.87, "learning_rate": 2.6785714285714288e-05, "loss": 1.3745, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.3394495412844037, "eval_loss": 1.3327373266220093, "eval_runtime": 0.4339, "eval_samples_per_second": 251.182, "eval_steps_per_second": 9.218, "step": 31 }, { "epoch": 4.9, "eval_accuracy": 0.42201834862385323, "eval_loss": 1.2938013076782227, "eval_runtime": 0.425, "eval_samples_per_second": 256.451, "eval_steps_per_second": 9.411, "step": 38 }, { "epoch": 5.16, "learning_rate": 3.571428571428572e-05, "loss": 1.3435, "step": 40 }, { "epoch": 5.94, "eval_accuracy": 0.46788990825688076, "eval_loss": 1.244996428489685, "eval_runtime": 0.4464, "eval_samples_per_second": 244.203, "eval_steps_per_second": 8.962, "step": 46 }, { "epoch": 6.45, "learning_rate": 4.464285714285715e-05, "loss": 1.2681, "step": 50 }, { "epoch": 6.97, "eval_accuracy": 0.5596330275229358, "eval_loss": 1.193323016166687, "eval_runtime": 0.4543, "eval_samples_per_second": 239.924, "eval_steps_per_second": 8.805, "step": 54 }, { "epoch": 7.74, "learning_rate": 4.960317460317461e-05, "loss": 1.1803, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.47706422018348627, "eval_loss": 1.1410021781921387, "eval_runtime": 0.5443, "eval_samples_per_second": 200.245, "eval_steps_per_second": 7.348, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.5045871559633027, "eval_loss": 1.1014117002487183, "eval_runtime": 0.4243, "eval_samples_per_second": 256.877, "eval_steps_per_second": 9.427, "step": 69 }, { "epoch": 9.03, "learning_rate": 4.8611111111111115e-05, "loss": 1.1277, "step": 70 }, { "epoch": 9.94, "eval_accuracy": 0.5321100917431193, "eval_loss": 1.0785263776779175, "eval_runtime": 0.5813, "eval_samples_per_second": 187.522, "eval_steps_per_second": 6.882, "step": 77 }, { "epoch": 10.32, "learning_rate": 4.761904761904762e-05, "loss": 1.0674, "step": 80 }, { "epoch": 10.97, "eval_accuracy": 0.5596330275229358, "eval_loss": 1.0439504384994507, "eval_runtime": 0.877, "eval_samples_per_second": 124.292, "eval_steps_per_second": 4.561, "step": 85 }, { "epoch": 11.61, "learning_rate": 4.662698412698413e-05, "loss": 1.0353, "step": 90 }, { "epoch": 12.0, "eval_accuracy": 0.5779816513761468, "eval_loss": 0.9961591362953186, "eval_runtime": 0.4276, "eval_samples_per_second": 254.912, "eval_steps_per_second": 9.355, "step": 93 }, { "epoch": 12.9, "learning_rate": 4.563492063492064e-05, "loss": 0.9859, "step": 100 }, { "epoch": 12.9, "eval_accuracy": 0.5871559633027523, "eval_loss": 0.9699747562408447, "eval_runtime": 0.488, "eval_samples_per_second": 223.356, "eval_steps_per_second": 8.197, "step": 100 }, { "epoch": 13.94, "eval_accuracy": 0.6422018348623854, "eval_loss": 0.9401906728744507, "eval_runtime": 0.426, "eval_samples_per_second": 255.868, "eval_steps_per_second": 9.39, "step": 108 }, { "epoch": 14.19, "learning_rate": 4.464285714285715e-05, "loss": 0.9397, "step": 110 }, { "epoch": 14.97, "eval_accuracy": 0.6238532110091743, "eval_loss": 0.9215461015701294, "eval_runtime": 0.595, "eval_samples_per_second": 183.202, "eval_steps_per_second": 6.723, "step": 116 }, { "epoch": 15.48, "learning_rate": 4.3650793650793655e-05, "loss": 0.8959, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.6605504587155964, "eval_loss": 0.8744844794273376, "eval_runtime": 0.4502, "eval_samples_per_second": 242.121, "eval_steps_per_second": 8.885, "step": 124 }, { "epoch": 16.77, "learning_rate": 4.265873015873016e-05, "loss": 0.8663, "step": 130 }, { "epoch": 16.9, "eval_accuracy": 0.6697247706422018, "eval_loss": 0.8560922145843506, "eval_runtime": 0.4537, "eval_samples_per_second": 240.261, "eval_steps_per_second": 8.817, "step": 131 }, { "epoch": 17.94, "eval_accuracy": 0.6788990825688074, "eval_loss": 0.8181523084640503, "eval_runtime": 0.4306, "eval_samples_per_second": 253.131, "eval_steps_per_second": 9.289, "step": 139 }, { "epoch": 18.06, "learning_rate": 4.166666666666667e-05, "loss": 0.8405, "step": 140 }, { "epoch": 18.97, "eval_accuracy": 0.6513761467889908, "eval_loss": 0.8167656660079956, "eval_runtime": 0.4512, "eval_samples_per_second": 241.596, "eval_steps_per_second": 8.866, "step": 147 }, { "epoch": 19.35, "learning_rate": 4.067460317460318e-05, "loss": 0.8093, "step": 150 }, { "epoch": 20.0, "eval_accuracy": 0.6788990825688074, "eval_loss": 0.8038831949234009, "eval_runtime": 0.434, "eval_samples_per_second": 251.165, "eval_steps_per_second": 9.217, "step": 155 }, { "epoch": 20.65, "learning_rate": 3.968253968253968e-05, "loss": 0.7396, "step": 160 }, { "epoch": 20.9, "eval_accuracy": 0.7064220183486238, "eval_loss": 0.7477715015411377, "eval_runtime": 0.44, "eval_samples_per_second": 247.725, "eval_steps_per_second": 9.091, "step": 162 }, { "epoch": 21.94, "learning_rate": 3.8690476190476195e-05, "loss": 0.7588, "step": 170 }, { "epoch": 21.94, "eval_accuracy": 0.6972477064220184, "eval_loss": 0.7236711978912354, "eval_runtime": 0.6128, "eval_samples_per_second": 177.862, "eval_steps_per_second": 6.527, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.7155963302752294, "eval_loss": 0.7030771374702454, "eval_runtime": 0.4488, "eval_samples_per_second": 242.893, "eval_steps_per_second": 8.913, "step": 178 }, { "epoch": 23.23, "learning_rate": 3.76984126984127e-05, "loss": 0.7189, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.6972477064220184, "eval_loss": 0.6955855488777161, "eval_runtime": 0.4384, "eval_samples_per_second": 248.655, "eval_steps_per_second": 9.125, "step": 186 }, { "epoch": 24.52, "learning_rate": 3.6706349206349205e-05, "loss": 0.7111, "step": 190 }, { "epoch": 24.9, "eval_accuracy": 0.7247706422018348, "eval_loss": 0.6749173998832703, "eval_runtime": 0.4278, "eval_samples_per_second": 254.789, "eval_steps_per_second": 9.35, "step": 193 }, { "epoch": 25.81, "learning_rate": 3.571428571428572e-05, "loss": 0.6577, "step": 200 }, { "epoch": 25.94, "eval_accuracy": 0.6972477064220184, "eval_loss": 0.6757855415344238, "eval_runtime": 0.433, "eval_samples_per_second": 251.741, "eval_steps_per_second": 9.238, "step": 201 }, { "epoch": 26.97, "eval_accuracy": 0.7339449541284404, "eval_loss": 0.6429359316825867, "eval_runtime": 0.6106, "eval_samples_per_second": 178.522, "eval_steps_per_second": 6.551, "step": 209 }, { "epoch": 27.1, "learning_rate": 3.472222222222222e-05, "loss": 0.6681, "step": 210 }, { "epoch": 28.0, "eval_accuracy": 0.7064220183486238, "eval_loss": 0.6450981497764587, "eval_runtime": 0.4394, "eval_samples_per_second": 248.065, "eval_steps_per_second": 9.103, "step": 217 }, { "epoch": 28.39, "learning_rate": 3.3730158730158734e-05, "loss": 0.6238, "step": 220 }, { "epoch": 28.9, "eval_accuracy": 0.7339449541284404, "eval_loss": 0.6367726922035217, "eval_runtime": 0.61, "eval_samples_per_second": 178.699, "eval_steps_per_second": 6.558, "step": 224 }, { "epoch": 29.68, "learning_rate": 3.273809523809524e-05, "loss": 0.6136, "step": 230 }, { "epoch": 29.94, "eval_accuracy": 0.7706422018348624, "eval_loss": 0.6232722997665405, "eval_runtime": 0.4356, "eval_samples_per_second": 250.256, "eval_steps_per_second": 9.184, "step": 232 }, { "epoch": 30.97, "learning_rate": 3.1746031746031745e-05, "loss": 0.5934, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.7706422018348624, "eval_loss": 0.6160764098167419, "eval_runtime": 0.4496, "eval_samples_per_second": 242.42, "eval_steps_per_second": 8.896, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.7431192660550459, "eval_loss": 0.626839816570282, "eval_runtime": 0.44, "eval_samples_per_second": 247.748, "eval_steps_per_second": 9.092, "step": 248 }, { "epoch": 32.26, "learning_rate": 3.075396825396826e-05, "loss": 0.5807, "step": 250 }, { "epoch": 32.9, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.5879014730453491, "eval_runtime": 0.449, "eval_samples_per_second": 242.778, "eval_steps_per_second": 8.909, "step": 255 }, { "epoch": 33.55, "learning_rate": 2.9761904761904762e-05, "loss": 0.575, "step": 260 }, { "epoch": 33.94, "eval_accuracy": 0.7706422018348624, "eval_loss": 0.5772398710250854, "eval_runtime": 0.4322, "eval_samples_per_second": 252.185, "eval_steps_per_second": 9.254, "step": 263 }, { "epoch": 34.84, "learning_rate": 2.876984126984127e-05, "loss": 0.5409, "step": 270 }, { "epoch": 34.97, "eval_accuracy": 0.7798165137614679, "eval_loss": 0.5702607035636902, "eval_runtime": 0.4332, "eval_samples_per_second": 251.604, "eval_steps_per_second": 9.233, "step": 271 }, { "epoch": 36.0, "eval_accuracy": 0.7889908256880734, "eval_loss": 0.5603441596031189, "eval_runtime": 0.6241, "eval_samples_per_second": 174.639, "eval_steps_per_second": 6.409, "step": 279 }, { "epoch": 36.13, "learning_rate": 2.777777777777778e-05, "loss": 0.553, "step": 280 }, { "epoch": 36.9, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.5560410022735596, "eval_runtime": 0.4375, "eval_samples_per_second": 249.161, "eval_steps_per_second": 9.144, "step": 286 }, { "epoch": 37.42, "learning_rate": 2.6785714285714288e-05, "loss": 0.515, "step": 290 }, { "epoch": 37.94, "eval_accuracy": 0.7706422018348624, "eval_loss": 0.5639447569847107, "eval_runtime": 0.4367, "eval_samples_per_second": 249.625, "eval_steps_per_second": 9.161, "step": 294 }, { "epoch": 38.71, "learning_rate": 2.5793650793650796e-05, "loss": 0.5424, "step": 300 }, { "epoch": 38.97, "eval_accuracy": 0.7889908256880734, "eval_loss": 0.5483418107032776, "eval_runtime": 0.4272, "eval_samples_per_second": 255.167, "eval_steps_per_second": 9.364, "step": 302 }, { "epoch": 40.0, "learning_rate": 2.4801587301587305e-05, "loss": 0.5193, "step": 310 }, { "epoch": 40.0, "eval_accuracy": 0.7798165137614679, "eval_loss": 0.5505456924438477, "eval_runtime": 0.4357, "eval_samples_per_second": 250.151, "eval_steps_per_second": 9.18, "step": 310 }, { "epoch": 40.9, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.5322768092155457, "eval_runtime": 0.438, "eval_samples_per_second": 248.859, "eval_steps_per_second": 9.132, "step": 317 }, { "epoch": 41.29, "learning_rate": 2.380952380952381e-05, "loss": 0.5123, "step": 320 }, { "epoch": 41.94, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.5257070064544678, "eval_runtime": 0.4489, "eval_samples_per_second": 242.816, "eval_steps_per_second": 8.911, "step": 325 }, { "epoch": 42.58, "learning_rate": 2.281746031746032e-05, "loss": 0.4719, "step": 330 }, { "epoch": 42.97, "eval_accuracy": 0.7798165137614679, "eval_loss": 0.5269966125488281, "eval_runtime": 0.7214, "eval_samples_per_second": 151.1, "eval_steps_per_second": 5.545, "step": 333 }, { "epoch": 43.87, "learning_rate": 2.1825396825396827e-05, "loss": 0.4583, "step": 340 }, { "epoch": 44.0, "eval_accuracy": 0.7706422018348624, "eval_loss": 0.5304660201072693, "eval_runtime": 0.4302, "eval_samples_per_second": 253.364, "eval_steps_per_second": 9.298, "step": 341 }, { "epoch": 44.9, "eval_accuracy": 0.7798165137614679, "eval_loss": 0.5281646251678467, "eval_runtime": 0.6252, "eval_samples_per_second": 174.336, "eval_steps_per_second": 6.398, "step": 348 }, { "epoch": 45.16, "learning_rate": 2.0833333333333336e-05, "loss": 0.4568, "step": 350 }, { "epoch": 45.94, "eval_accuracy": 0.7889908256880734, "eval_loss": 0.5177940130233765, "eval_runtime": 0.4569, "eval_samples_per_second": 238.568, "eval_steps_per_second": 8.755, "step": 356 }, { "epoch": 46.45, "learning_rate": 1.984126984126984e-05, "loss": 0.4717, "step": 360 }, { "epoch": 46.97, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.49453452229499817, "eval_runtime": 0.4248, "eval_samples_per_second": 256.572, "eval_steps_per_second": 9.415, "step": 364 }, { "epoch": 47.74, "learning_rate": 1.884920634920635e-05, "loss": 0.4587, "step": 370 }, { "epoch": 48.0, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.4978266656398773, "eval_runtime": 0.4615, "eval_samples_per_second": 236.178, "eval_steps_per_second": 8.667, "step": 372 }, { "epoch": 48.9, "eval_accuracy": 0.7889908256880734, "eval_loss": 0.4887874722480774, "eval_runtime": 0.4558, "eval_samples_per_second": 239.119, "eval_steps_per_second": 8.775, "step": 379 }, { "epoch": 49.03, "learning_rate": 1.785714285714286e-05, "loss": 0.4314, "step": 380 }, { "epoch": 49.94, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.48671984672546387, "eval_runtime": 0.4423, "eval_samples_per_second": 246.43, "eval_steps_per_second": 9.043, "step": 387 }, { "epoch": 50.32, "learning_rate": 1.6865079365079367e-05, "loss": 0.4389, "step": 390 }, { "epoch": 50.97, "eval_accuracy": 0.7889908256880734, "eval_loss": 0.4739398658275604, "eval_runtime": 0.4484, "eval_samples_per_second": 243.061, "eval_steps_per_second": 8.92, "step": 395 }, { "epoch": 51.61, "learning_rate": 1.5873015873015872e-05, "loss": 0.4115, "step": 400 }, { "epoch": 52.0, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.4844360053539276, "eval_runtime": 0.6269, "eval_samples_per_second": 173.869, "eval_steps_per_second": 6.381, "step": 403 }, { "epoch": 52.9, "learning_rate": 1.4880952380952381e-05, "loss": 0.4323, "step": 410 }, { "epoch": 52.9, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.4818795323371887, "eval_runtime": 0.4584, "eval_samples_per_second": 237.796, "eval_steps_per_second": 8.726, "step": 410 }, { "epoch": 53.94, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.45617520809173584, "eval_runtime": 0.4517, "eval_samples_per_second": 241.332, "eval_steps_per_second": 8.856, "step": 418 }, { "epoch": 54.19, "learning_rate": 1.388888888888889e-05, "loss": 0.3855, "step": 420 }, { "epoch": 54.97, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.4639626145362854, "eval_runtime": 0.4274, "eval_samples_per_second": 255.051, "eval_steps_per_second": 9.36, "step": 426 }, { "epoch": 55.48, "learning_rate": 1.2896825396825398e-05, "loss": 0.4113, "step": 430 }, { "epoch": 56.0, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.44737786054611206, "eval_runtime": 0.4453, "eval_samples_per_second": 244.764, "eval_steps_per_second": 8.982, "step": 434 }, { "epoch": 56.77, "learning_rate": 1.1904761904761905e-05, "loss": 0.4282, "step": 440 }, { "epoch": 56.9, "eval_accuracy": 0.7981651376146789, "eval_loss": 0.45404085516929626, "eval_runtime": 0.4314, "eval_samples_per_second": 252.681, "eval_steps_per_second": 9.273, "step": 441 }, { "epoch": 57.94, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.44497042894363403, "eval_runtime": 0.4614, "eval_samples_per_second": 236.26, "eval_steps_per_second": 8.67, "step": 449 }, { "epoch": 58.06, "learning_rate": 1.0912698412698414e-05, "loss": 0.4499, "step": 450 }, { "epoch": 58.97, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.44970786571502686, "eval_runtime": 0.633, "eval_samples_per_second": 172.2, "eval_steps_per_second": 6.319, "step": 457 }, { "epoch": 59.35, "learning_rate": 9.92063492063492e-06, "loss": 0.4179, "step": 460 }, { "epoch": 60.0, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.44003376364707947, "eval_runtime": 0.4509, "eval_samples_per_second": 241.755, "eval_steps_per_second": 8.872, "step": 465 }, { "epoch": 60.65, "learning_rate": 8.92857142857143e-06, "loss": 0.4213, "step": 470 }, { "epoch": 60.9, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.4391891658306122, "eval_runtime": 0.4498, "eval_samples_per_second": 242.327, "eval_steps_per_second": 8.893, "step": 472 }, { "epoch": 61.94, "learning_rate": 7.936507936507936e-06, "loss": 0.4176, "step": 480 }, { "epoch": 61.94, "eval_accuracy": 0.8256880733944955, "eval_loss": 0.43245241045951843, "eval_runtime": 0.4412, "eval_samples_per_second": 247.039, "eval_steps_per_second": 9.066, "step": 480 }, { "epoch": 62.97, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.42957109212875366, "eval_runtime": 0.4491, "eval_samples_per_second": 242.688, "eval_steps_per_second": 8.906, "step": 488 }, { "epoch": 63.23, "learning_rate": 6.944444444444445e-06, "loss": 0.4083, "step": 490 }, { "epoch": 64.0, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.43877631425857544, "eval_runtime": 0.4459, "eval_samples_per_second": 244.475, "eval_steps_per_second": 8.972, "step": 496 }, { "epoch": 64.52, "learning_rate": 5.9523809523809525e-06, "loss": 0.3853, "step": 500 }, { "epoch": 64.9, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.43918871879577637, "eval_runtime": 0.4466, "eval_samples_per_second": 244.06, "eval_steps_per_second": 8.956, "step": 503 }, { "epoch": 65.81, "learning_rate": 4.96031746031746e-06, "loss": 0.3647, "step": 510 }, { "epoch": 65.94, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.4348970353603363, "eval_runtime": 0.6039, "eval_samples_per_second": 180.506, "eval_steps_per_second": 6.624, "step": 511 }, { "epoch": 66.97, "eval_accuracy": 0.8256880733944955, "eval_loss": 0.4343608319759369, "eval_runtime": 0.4394, "eval_samples_per_second": 248.071, "eval_steps_per_second": 9.104, "step": 519 }, { "epoch": 67.1, "learning_rate": 3.968253968253968e-06, "loss": 0.3927, "step": 520 }, { "epoch": 68.0, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.43479177355766296, "eval_runtime": 0.4462, "eval_samples_per_second": 244.289, "eval_steps_per_second": 8.965, "step": 527 }, { "epoch": 68.39, "learning_rate": 2.9761904761904763e-06, "loss": 0.3833, "step": 530 }, { "epoch": 68.9, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.4351903796195984, "eval_runtime": 0.4439, "eval_samples_per_second": 245.538, "eval_steps_per_second": 9.011, "step": 534 }, { "epoch": 69.68, "learning_rate": 1.984126984126984e-06, "loss": 0.3932, "step": 540 }, { "epoch": 69.94, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.4294258952140808, "eval_runtime": 0.4336, "eval_samples_per_second": 251.361, "eval_steps_per_second": 9.224, "step": 542 }, { "epoch": 70.97, "learning_rate": 9.92063492063492e-07, "loss": 0.4085, "step": 550 }, { "epoch": 70.97, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.42758503556251526, "eval_runtime": 0.4574, "eval_samples_per_second": 238.283, "eval_steps_per_second": 8.744, "step": 550 }, { "epoch": 72.0, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.4231923222541809, "eval_runtime": 0.4385, "eval_samples_per_second": 248.573, "eval_steps_per_second": 9.122, "step": 558 }, { "epoch": 72.26, "learning_rate": 0.0, "loss": 0.4029, "step": 560 }, { "epoch": 72.26, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.435944139957428, "eval_runtime": 0.466, "eval_samples_per_second": 233.911, "eval_steps_per_second": 8.584, "step": 560 }, { "epoch": 72.26, "step": 560, "total_flos": 1.937282408715387e+17, "train_loss": 0.6618133915322167, "train_runtime": 514.9079, "train_samples_per_second": 151.95, "train_steps_per_second": 1.088 } ], "logging_steps": 10, "max_steps": 560, "num_train_epochs": 80, "save_steps": 500, "total_flos": 1.937282408715387e+17, "trial_name": null, "trial_params": null }