swiftformer-xs-dmae-va-U-80 / trainer_state.json
Augusto777's picture
End of training
eeb99df verified
{
"best_metric": 0.8256880733944955,
"best_model_checkpoint": "swiftformer-xs-dmae-va-U-80/checkpoint-480",
"epoch": 72.25806451612904,
"eval_steps": 500,
"global_step": 560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9,
"eval_accuracy": 0.28440366972477066,
"eval_loss": 1.3862813711166382,
"eval_runtime": 0.4815,
"eval_samples_per_second": 226.357,
"eval_steps_per_second": 8.307,
"step": 7
},
{
"epoch": 1.29,
"learning_rate": 8.92857142857143e-06,
"loss": 1.4158,
"step": 10
},
{
"epoch": 1.94,
"eval_accuracy": 0.3119266055045872,
"eval_loss": 1.376009225845337,
"eval_runtime": 0.4374,
"eval_samples_per_second": 249.207,
"eval_steps_per_second": 9.145,
"step": 15
},
{
"epoch": 2.58,
"learning_rate": 1.785714285714286e-05,
"loss": 1.3853,
"step": 20
},
{
"epoch": 2.97,
"eval_accuracy": 0.3853211009174312,
"eval_loss": 1.3548262119293213,
"eval_runtime": 0.4385,
"eval_samples_per_second": 248.6,
"eval_steps_per_second": 9.123,
"step": 23
},
{
"epoch": 3.87,
"learning_rate": 2.6785714285714288e-05,
"loss": 1.3745,
"step": 30
},
{
"epoch": 4.0,
"eval_accuracy": 0.3394495412844037,
"eval_loss": 1.3327373266220093,
"eval_runtime": 0.4339,
"eval_samples_per_second": 251.182,
"eval_steps_per_second": 9.218,
"step": 31
},
{
"epoch": 4.9,
"eval_accuracy": 0.42201834862385323,
"eval_loss": 1.2938013076782227,
"eval_runtime": 0.425,
"eval_samples_per_second": 256.451,
"eval_steps_per_second": 9.411,
"step": 38
},
{
"epoch": 5.16,
"learning_rate": 3.571428571428572e-05,
"loss": 1.3435,
"step": 40
},
{
"epoch": 5.94,
"eval_accuracy": 0.46788990825688076,
"eval_loss": 1.244996428489685,
"eval_runtime": 0.4464,
"eval_samples_per_second": 244.203,
"eval_steps_per_second": 8.962,
"step": 46
},
{
"epoch": 6.45,
"learning_rate": 4.464285714285715e-05,
"loss": 1.2681,
"step": 50
},
{
"epoch": 6.97,
"eval_accuracy": 0.5596330275229358,
"eval_loss": 1.193323016166687,
"eval_runtime": 0.4543,
"eval_samples_per_second": 239.924,
"eval_steps_per_second": 8.805,
"step": 54
},
{
"epoch": 7.74,
"learning_rate": 4.960317460317461e-05,
"loss": 1.1803,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.47706422018348627,
"eval_loss": 1.1410021781921387,
"eval_runtime": 0.5443,
"eval_samples_per_second": 200.245,
"eval_steps_per_second": 7.348,
"step": 62
},
{
"epoch": 8.9,
"eval_accuracy": 0.5045871559633027,
"eval_loss": 1.1014117002487183,
"eval_runtime": 0.4243,
"eval_samples_per_second": 256.877,
"eval_steps_per_second": 9.427,
"step": 69
},
{
"epoch": 9.03,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.1277,
"step": 70
},
{
"epoch": 9.94,
"eval_accuracy": 0.5321100917431193,
"eval_loss": 1.0785263776779175,
"eval_runtime": 0.5813,
"eval_samples_per_second": 187.522,
"eval_steps_per_second": 6.882,
"step": 77
},
{
"epoch": 10.32,
"learning_rate": 4.761904761904762e-05,
"loss": 1.0674,
"step": 80
},
{
"epoch": 10.97,
"eval_accuracy": 0.5596330275229358,
"eval_loss": 1.0439504384994507,
"eval_runtime": 0.877,
"eval_samples_per_second": 124.292,
"eval_steps_per_second": 4.561,
"step": 85
},
{
"epoch": 11.61,
"learning_rate": 4.662698412698413e-05,
"loss": 1.0353,
"step": 90
},
{
"epoch": 12.0,
"eval_accuracy": 0.5779816513761468,
"eval_loss": 0.9961591362953186,
"eval_runtime": 0.4276,
"eval_samples_per_second": 254.912,
"eval_steps_per_second": 9.355,
"step": 93
},
{
"epoch": 12.9,
"learning_rate": 4.563492063492064e-05,
"loss": 0.9859,
"step": 100
},
{
"epoch": 12.9,
"eval_accuracy": 0.5871559633027523,
"eval_loss": 0.9699747562408447,
"eval_runtime": 0.488,
"eval_samples_per_second": 223.356,
"eval_steps_per_second": 8.197,
"step": 100
},
{
"epoch": 13.94,
"eval_accuracy": 0.6422018348623854,
"eval_loss": 0.9401906728744507,
"eval_runtime": 0.426,
"eval_samples_per_second": 255.868,
"eval_steps_per_second": 9.39,
"step": 108
},
{
"epoch": 14.19,
"learning_rate": 4.464285714285715e-05,
"loss": 0.9397,
"step": 110
},
{
"epoch": 14.97,
"eval_accuracy": 0.6238532110091743,
"eval_loss": 0.9215461015701294,
"eval_runtime": 0.595,
"eval_samples_per_second": 183.202,
"eval_steps_per_second": 6.723,
"step": 116
},
{
"epoch": 15.48,
"learning_rate": 4.3650793650793655e-05,
"loss": 0.8959,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.6605504587155964,
"eval_loss": 0.8744844794273376,
"eval_runtime": 0.4502,
"eval_samples_per_second": 242.121,
"eval_steps_per_second": 8.885,
"step": 124
},
{
"epoch": 16.77,
"learning_rate": 4.265873015873016e-05,
"loss": 0.8663,
"step": 130
},
{
"epoch": 16.9,
"eval_accuracy": 0.6697247706422018,
"eval_loss": 0.8560922145843506,
"eval_runtime": 0.4537,
"eval_samples_per_second": 240.261,
"eval_steps_per_second": 8.817,
"step": 131
},
{
"epoch": 17.94,
"eval_accuracy": 0.6788990825688074,
"eval_loss": 0.8181523084640503,
"eval_runtime": 0.4306,
"eval_samples_per_second": 253.131,
"eval_steps_per_second": 9.289,
"step": 139
},
{
"epoch": 18.06,
"learning_rate": 4.166666666666667e-05,
"loss": 0.8405,
"step": 140
},
{
"epoch": 18.97,
"eval_accuracy": 0.6513761467889908,
"eval_loss": 0.8167656660079956,
"eval_runtime": 0.4512,
"eval_samples_per_second": 241.596,
"eval_steps_per_second": 8.866,
"step": 147
},
{
"epoch": 19.35,
"learning_rate": 4.067460317460318e-05,
"loss": 0.8093,
"step": 150
},
{
"epoch": 20.0,
"eval_accuracy": 0.6788990825688074,
"eval_loss": 0.8038831949234009,
"eval_runtime": 0.434,
"eval_samples_per_second": 251.165,
"eval_steps_per_second": 9.217,
"step": 155
},
{
"epoch": 20.65,
"learning_rate": 3.968253968253968e-05,
"loss": 0.7396,
"step": 160
},
{
"epoch": 20.9,
"eval_accuracy": 0.7064220183486238,
"eval_loss": 0.7477715015411377,
"eval_runtime": 0.44,
"eval_samples_per_second": 247.725,
"eval_steps_per_second": 9.091,
"step": 162
},
{
"epoch": 21.94,
"learning_rate": 3.8690476190476195e-05,
"loss": 0.7588,
"step": 170
},
{
"epoch": 21.94,
"eval_accuracy": 0.6972477064220184,
"eval_loss": 0.7236711978912354,
"eval_runtime": 0.6128,
"eval_samples_per_second": 177.862,
"eval_steps_per_second": 6.527,
"step": 170
},
{
"epoch": 22.97,
"eval_accuracy": 0.7155963302752294,
"eval_loss": 0.7030771374702454,
"eval_runtime": 0.4488,
"eval_samples_per_second": 242.893,
"eval_steps_per_second": 8.913,
"step": 178
},
{
"epoch": 23.23,
"learning_rate": 3.76984126984127e-05,
"loss": 0.7189,
"step": 180
},
{
"epoch": 24.0,
"eval_accuracy": 0.6972477064220184,
"eval_loss": 0.6955855488777161,
"eval_runtime": 0.4384,
"eval_samples_per_second": 248.655,
"eval_steps_per_second": 9.125,
"step": 186
},
{
"epoch": 24.52,
"learning_rate": 3.6706349206349205e-05,
"loss": 0.7111,
"step": 190
},
{
"epoch": 24.9,
"eval_accuracy": 0.7247706422018348,
"eval_loss": 0.6749173998832703,
"eval_runtime": 0.4278,
"eval_samples_per_second": 254.789,
"eval_steps_per_second": 9.35,
"step": 193
},
{
"epoch": 25.81,
"learning_rate": 3.571428571428572e-05,
"loss": 0.6577,
"step": 200
},
{
"epoch": 25.94,
"eval_accuracy": 0.6972477064220184,
"eval_loss": 0.6757855415344238,
"eval_runtime": 0.433,
"eval_samples_per_second": 251.741,
"eval_steps_per_second": 9.238,
"step": 201
},
{
"epoch": 26.97,
"eval_accuracy": 0.7339449541284404,
"eval_loss": 0.6429359316825867,
"eval_runtime": 0.6106,
"eval_samples_per_second": 178.522,
"eval_steps_per_second": 6.551,
"step": 209
},
{
"epoch": 27.1,
"learning_rate": 3.472222222222222e-05,
"loss": 0.6681,
"step": 210
},
{
"epoch": 28.0,
"eval_accuracy": 0.7064220183486238,
"eval_loss": 0.6450981497764587,
"eval_runtime": 0.4394,
"eval_samples_per_second": 248.065,
"eval_steps_per_second": 9.103,
"step": 217
},
{
"epoch": 28.39,
"learning_rate": 3.3730158730158734e-05,
"loss": 0.6238,
"step": 220
},
{
"epoch": 28.9,
"eval_accuracy": 0.7339449541284404,
"eval_loss": 0.6367726922035217,
"eval_runtime": 0.61,
"eval_samples_per_second": 178.699,
"eval_steps_per_second": 6.558,
"step": 224
},
{
"epoch": 29.68,
"learning_rate": 3.273809523809524e-05,
"loss": 0.6136,
"step": 230
},
{
"epoch": 29.94,
"eval_accuracy": 0.7706422018348624,
"eval_loss": 0.6232722997665405,
"eval_runtime": 0.4356,
"eval_samples_per_second": 250.256,
"eval_steps_per_second": 9.184,
"step": 232
},
{
"epoch": 30.97,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.5934,
"step": 240
},
{
"epoch": 30.97,
"eval_accuracy": 0.7706422018348624,
"eval_loss": 0.6160764098167419,
"eval_runtime": 0.4496,
"eval_samples_per_second": 242.42,
"eval_steps_per_second": 8.896,
"step": 240
},
{
"epoch": 32.0,
"eval_accuracy": 0.7431192660550459,
"eval_loss": 0.626839816570282,
"eval_runtime": 0.44,
"eval_samples_per_second": 247.748,
"eval_steps_per_second": 9.092,
"step": 248
},
{
"epoch": 32.26,
"learning_rate": 3.075396825396826e-05,
"loss": 0.5807,
"step": 250
},
{
"epoch": 32.9,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.5879014730453491,
"eval_runtime": 0.449,
"eval_samples_per_second": 242.778,
"eval_steps_per_second": 8.909,
"step": 255
},
{
"epoch": 33.55,
"learning_rate": 2.9761904761904762e-05,
"loss": 0.575,
"step": 260
},
{
"epoch": 33.94,
"eval_accuracy": 0.7706422018348624,
"eval_loss": 0.5772398710250854,
"eval_runtime": 0.4322,
"eval_samples_per_second": 252.185,
"eval_steps_per_second": 9.254,
"step": 263
},
{
"epoch": 34.84,
"learning_rate": 2.876984126984127e-05,
"loss": 0.5409,
"step": 270
},
{
"epoch": 34.97,
"eval_accuracy": 0.7798165137614679,
"eval_loss": 0.5702607035636902,
"eval_runtime": 0.4332,
"eval_samples_per_second": 251.604,
"eval_steps_per_second": 9.233,
"step": 271
},
{
"epoch": 36.0,
"eval_accuracy": 0.7889908256880734,
"eval_loss": 0.5603441596031189,
"eval_runtime": 0.6241,
"eval_samples_per_second": 174.639,
"eval_steps_per_second": 6.409,
"step": 279
},
{
"epoch": 36.13,
"learning_rate": 2.777777777777778e-05,
"loss": 0.553,
"step": 280
},
{
"epoch": 36.9,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.5560410022735596,
"eval_runtime": 0.4375,
"eval_samples_per_second": 249.161,
"eval_steps_per_second": 9.144,
"step": 286
},
{
"epoch": 37.42,
"learning_rate": 2.6785714285714288e-05,
"loss": 0.515,
"step": 290
},
{
"epoch": 37.94,
"eval_accuracy": 0.7706422018348624,
"eval_loss": 0.5639447569847107,
"eval_runtime": 0.4367,
"eval_samples_per_second": 249.625,
"eval_steps_per_second": 9.161,
"step": 294
},
{
"epoch": 38.71,
"learning_rate": 2.5793650793650796e-05,
"loss": 0.5424,
"step": 300
},
{
"epoch": 38.97,
"eval_accuracy": 0.7889908256880734,
"eval_loss": 0.5483418107032776,
"eval_runtime": 0.4272,
"eval_samples_per_second": 255.167,
"eval_steps_per_second": 9.364,
"step": 302
},
{
"epoch": 40.0,
"learning_rate": 2.4801587301587305e-05,
"loss": 0.5193,
"step": 310
},
{
"epoch": 40.0,
"eval_accuracy": 0.7798165137614679,
"eval_loss": 0.5505456924438477,
"eval_runtime": 0.4357,
"eval_samples_per_second": 250.151,
"eval_steps_per_second": 9.18,
"step": 310
},
{
"epoch": 40.9,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.5322768092155457,
"eval_runtime": 0.438,
"eval_samples_per_second": 248.859,
"eval_steps_per_second": 9.132,
"step": 317
},
{
"epoch": 41.29,
"learning_rate": 2.380952380952381e-05,
"loss": 0.5123,
"step": 320
},
{
"epoch": 41.94,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.5257070064544678,
"eval_runtime": 0.4489,
"eval_samples_per_second": 242.816,
"eval_steps_per_second": 8.911,
"step": 325
},
{
"epoch": 42.58,
"learning_rate": 2.281746031746032e-05,
"loss": 0.4719,
"step": 330
},
{
"epoch": 42.97,
"eval_accuracy": 0.7798165137614679,
"eval_loss": 0.5269966125488281,
"eval_runtime": 0.7214,
"eval_samples_per_second": 151.1,
"eval_steps_per_second": 5.545,
"step": 333
},
{
"epoch": 43.87,
"learning_rate": 2.1825396825396827e-05,
"loss": 0.4583,
"step": 340
},
{
"epoch": 44.0,
"eval_accuracy": 0.7706422018348624,
"eval_loss": 0.5304660201072693,
"eval_runtime": 0.4302,
"eval_samples_per_second": 253.364,
"eval_steps_per_second": 9.298,
"step": 341
},
{
"epoch": 44.9,
"eval_accuracy": 0.7798165137614679,
"eval_loss": 0.5281646251678467,
"eval_runtime": 0.6252,
"eval_samples_per_second": 174.336,
"eval_steps_per_second": 6.398,
"step": 348
},
{
"epoch": 45.16,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.4568,
"step": 350
},
{
"epoch": 45.94,
"eval_accuracy": 0.7889908256880734,
"eval_loss": 0.5177940130233765,
"eval_runtime": 0.4569,
"eval_samples_per_second": 238.568,
"eval_steps_per_second": 8.755,
"step": 356
},
{
"epoch": 46.45,
"learning_rate": 1.984126984126984e-05,
"loss": 0.4717,
"step": 360
},
{
"epoch": 46.97,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.49453452229499817,
"eval_runtime": 0.4248,
"eval_samples_per_second": 256.572,
"eval_steps_per_second": 9.415,
"step": 364
},
{
"epoch": 47.74,
"learning_rate": 1.884920634920635e-05,
"loss": 0.4587,
"step": 370
},
{
"epoch": 48.0,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.4978266656398773,
"eval_runtime": 0.4615,
"eval_samples_per_second": 236.178,
"eval_steps_per_second": 8.667,
"step": 372
},
{
"epoch": 48.9,
"eval_accuracy": 0.7889908256880734,
"eval_loss": 0.4887874722480774,
"eval_runtime": 0.4558,
"eval_samples_per_second": 239.119,
"eval_steps_per_second": 8.775,
"step": 379
},
{
"epoch": 49.03,
"learning_rate": 1.785714285714286e-05,
"loss": 0.4314,
"step": 380
},
{
"epoch": 49.94,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.48671984672546387,
"eval_runtime": 0.4423,
"eval_samples_per_second": 246.43,
"eval_steps_per_second": 9.043,
"step": 387
},
{
"epoch": 50.32,
"learning_rate": 1.6865079365079367e-05,
"loss": 0.4389,
"step": 390
},
{
"epoch": 50.97,
"eval_accuracy": 0.7889908256880734,
"eval_loss": 0.4739398658275604,
"eval_runtime": 0.4484,
"eval_samples_per_second": 243.061,
"eval_steps_per_second": 8.92,
"step": 395
},
{
"epoch": 51.61,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.4115,
"step": 400
},
{
"epoch": 52.0,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.4844360053539276,
"eval_runtime": 0.6269,
"eval_samples_per_second": 173.869,
"eval_steps_per_second": 6.381,
"step": 403
},
{
"epoch": 52.9,
"learning_rate": 1.4880952380952381e-05,
"loss": 0.4323,
"step": 410
},
{
"epoch": 52.9,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.4818795323371887,
"eval_runtime": 0.4584,
"eval_samples_per_second": 237.796,
"eval_steps_per_second": 8.726,
"step": 410
},
{
"epoch": 53.94,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.45617520809173584,
"eval_runtime": 0.4517,
"eval_samples_per_second": 241.332,
"eval_steps_per_second": 8.856,
"step": 418
},
{
"epoch": 54.19,
"learning_rate": 1.388888888888889e-05,
"loss": 0.3855,
"step": 420
},
{
"epoch": 54.97,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.4639626145362854,
"eval_runtime": 0.4274,
"eval_samples_per_second": 255.051,
"eval_steps_per_second": 9.36,
"step": 426
},
{
"epoch": 55.48,
"learning_rate": 1.2896825396825398e-05,
"loss": 0.4113,
"step": 430
},
{
"epoch": 56.0,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.44737786054611206,
"eval_runtime": 0.4453,
"eval_samples_per_second": 244.764,
"eval_steps_per_second": 8.982,
"step": 434
},
{
"epoch": 56.77,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.4282,
"step": 440
},
{
"epoch": 56.9,
"eval_accuracy": 0.7981651376146789,
"eval_loss": 0.45404085516929626,
"eval_runtime": 0.4314,
"eval_samples_per_second": 252.681,
"eval_steps_per_second": 9.273,
"step": 441
},
{
"epoch": 57.94,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.44497042894363403,
"eval_runtime": 0.4614,
"eval_samples_per_second": 236.26,
"eval_steps_per_second": 8.67,
"step": 449
},
{
"epoch": 58.06,
"learning_rate": 1.0912698412698414e-05,
"loss": 0.4499,
"step": 450
},
{
"epoch": 58.97,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.44970786571502686,
"eval_runtime": 0.633,
"eval_samples_per_second": 172.2,
"eval_steps_per_second": 6.319,
"step": 457
},
{
"epoch": 59.35,
"learning_rate": 9.92063492063492e-06,
"loss": 0.4179,
"step": 460
},
{
"epoch": 60.0,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.44003376364707947,
"eval_runtime": 0.4509,
"eval_samples_per_second": 241.755,
"eval_steps_per_second": 8.872,
"step": 465
},
{
"epoch": 60.65,
"learning_rate": 8.92857142857143e-06,
"loss": 0.4213,
"step": 470
},
{
"epoch": 60.9,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.4391891658306122,
"eval_runtime": 0.4498,
"eval_samples_per_second": 242.327,
"eval_steps_per_second": 8.893,
"step": 472
},
{
"epoch": 61.94,
"learning_rate": 7.936507936507936e-06,
"loss": 0.4176,
"step": 480
},
{
"epoch": 61.94,
"eval_accuracy": 0.8256880733944955,
"eval_loss": 0.43245241045951843,
"eval_runtime": 0.4412,
"eval_samples_per_second": 247.039,
"eval_steps_per_second": 9.066,
"step": 480
},
{
"epoch": 62.97,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.42957109212875366,
"eval_runtime": 0.4491,
"eval_samples_per_second": 242.688,
"eval_steps_per_second": 8.906,
"step": 488
},
{
"epoch": 63.23,
"learning_rate": 6.944444444444445e-06,
"loss": 0.4083,
"step": 490
},
{
"epoch": 64.0,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.43877631425857544,
"eval_runtime": 0.4459,
"eval_samples_per_second": 244.475,
"eval_steps_per_second": 8.972,
"step": 496
},
{
"epoch": 64.52,
"learning_rate": 5.9523809523809525e-06,
"loss": 0.3853,
"step": 500
},
{
"epoch": 64.9,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.43918871879577637,
"eval_runtime": 0.4466,
"eval_samples_per_second": 244.06,
"eval_steps_per_second": 8.956,
"step": 503
},
{
"epoch": 65.81,
"learning_rate": 4.96031746031746e-06,
"loss": 0.3647,
"step": 510
},
{
"epoch": 65.94,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.4348970353603363,
"eval_runtime": 0.6039,
"eval_samples_per_second": 180.506,
"eval_steps_per_second": 6.624,
"step": 511
},
{
"epoch": 66.97,
"eval_accuracy": 0.8256880733944955,
"eval_loss": 0.4343608319759369,
"eval_runtime": 0.4394,
"eval_samples_per_second": 248.071,
"eval_steps_per_second": 9.104,
"step": 519
},
{
"epoch": 67.1,
"learning_rate": 3.968253968253968e-06,
"loss": 0.3927,
"step": 520
},
{
"epoch": 68.0,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.43479177355766296,
"eval_runtime": 0.4462,
"eval_samples_per_second": 244.289,
"eval_steps_per_second": 8.965,
"step": 527
},
{
"epoch": 68.39,
"learning_rate": 2.9761904761904763e-06,
"loss": 0.3833,
"step": 530
},
{
"epoch": 68.9,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.4351903796195984,
"eval_runtime": 0.4439,
"eval_samples_per_second": 245.538,
"eval_steps_per_second": 9.011,
"step": 534
},
{
"epoch": 69.68,
"learning_rate": 1.984126984126984e-06,
"loss": 0.3932,
"step": 540
},
{
"epoch": 69.94,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.4294258952140808,
"eval_runtime": 0.4336,
"eval_samples_per_second": 251.361,
"eval_steps_per_second": 9.224,
"step": 542
},
{
"epoch": 70.97,
"learning_rate": 9.92063492063492e-07,
"loss": 0.4085,
"step": 550
},
{
"epoch": 70.97,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.42758503556251526,
"eval_runtime": 0.4574,
"eval_samples_per_second": 238.283,
"eval_steps_per_second": 8.744,
"step": 550
},
{
"epoch": 72.0,
"eval_accuracy": 0.8073394495412844,
"eval_loss": 0.4231923222541809,
"eval_runtime": 0.4385,
"eval_samples_per_second": 248.573,
"eval_steps_per_second": 9.122,
"step": 558
},
{
"epoch": 72.26,
"learning_rate": 0.0,
"loss": 0.4029,
"step": 560
},
{
"epoch": 72.26,
"eval_accuracy": 0.8165137614678899,
"eval_loss": 0.435944139957428,
"eval_runtime": 0.466,
"eval_samples_per_second": 233.911,
"eval_steps_per_second": 8.584,
"step": 560
},
{
"epoch": 72.26,
"step": 560,
"total_flos": 1.937282408715387e+17,
"train_loss": 0.6618133915322167,
"train_runtime": 514.9079,
"train_samples_per_second": 151.95,
"train_steps_per_second": 1.088
}
],
"logging_steps": 10,
"max_steps": 560,
"num_train_epochs": 80,
"save_steps": 500,
"total_flos": 1.937282408715387e+17,
"trial_name": null,
"trial_params": null
}