swiftformer-xs-dmae-va-U5-80C / trainer_state.json
Augusto777's picture
End of training
c456b3b verified
{
"best_metric": 0.8166666666666667,
"best_model_checkpoint": "swiftformer-xs-dmae-va-U5-42C\\checkpoint-418",
"epoch": 72.25806451612904,
"eval_steps": 500,
"global_step": 560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9,
"eval_accuracy": 0.4666666666666667,
"eval_loss": 1.3856309652328491,
"eval_runtime": 1.1043,
"eval_samples_per_second": 54.333,
"eval_steps_per_second": 1.811,
"step": 7
},
{
"epoch": 1.55,
"learning_rate": 4.2857142857142856e-05,
"loss": 1.3855,
"step": 12
},
{
"epoch": 1.94,
"eval_accuracy": 0.48333333333333334,
"eval_loss": 1.3819035291671753,
"eval_runtime": 0.7186,
"eval_samples_per_second": 83.495,
"eval_steps_per_second": 2.783,
"step": 15
},
{
"epoch": 2.97,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.3687236309051514,
"eval_runtime": 0.7101,
"eval_samples_per_second": 84.495,
"eval_steps_per_second": 2.817,
"step": 23
},
{
"epoch": 3.1,
"learning_rate": 8.571428571428571e-05,
"loss": 1.3742,
"step": 24
},
{
"epoch": 4.0,
"eval_accuracy": 0.31666666666666665,
"eval_loss": 1.3188554048538208,
"eval_runtime": 0.726,
"eval_samples_per_second": 82.647,
"eval_steps_per_second": 2.755,
"step": 31
},
{
"epoch": 4.65,
"learning_rate": 0.00012857142857142858,
"loss": 1.3004,
"step": 36
},
{
"epoch": 4.9,
"eval_accuracy": 0.48333333333333334,
"eval_loss": 1.2501040697097778,
"eval_runtime": 0.725,
"eval_samples_per_second": 82.755,
"eval_steps_per_second": 2.759,
"step": 38
},
{
"epoch": 5.94,
"eval_accuracy": 0.48333333333333334,
"eval_loss": 1.2268178462982178,
"eval_runtime": 0.7099,
"eval_samples_per_second": 84.517,
"eval_steps_per_second": 2.817,
"step": 46
},
{
"epoch": 6.19,
"learning_rate": 0.00017142857142857143,
"loss": 1.1716,
"step": 48
},
{
"epoch": 6.97,
"eval_accuracy": 0.5,
"eval_loss": 1.211478352546692,
"eval_runtime": 0.7071,
"eval_samples_per_second": 84.855,
"eval_steps_per_second": 2.828,
"step": 54
},
{
"epoch": 7.74,
"learning_rate": 0.00019841269841269844,
"loss": 1.0686,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.5333333333333333,
"eval_loss": 1.2243305444717407,
"eval_runtime": 0.7126,
"eval_samples_per_second": 84.197,
"eval_steps_per_second": 2.807,
"step": 62
},
{
"epoch": 8.9,
"eval_accuracy": 0.55,
"eval_loss": 1.1432182788848877,
"eval_runtime": 0.7953,
"eval_samples_per_second": 75.447,
"eval_steps_per_second": 2.515,
"step": 69
},
{
"epoch": 9.29,
"learning_rate": 0.00019365079365079365,
"loss": 0.9764,
"step": 72
},
{
"epoch": 9.94,
"eval_accuracy": 0.55,
"eval_loss": 1.020477294921875,
"eval_runtime": 0.7686,
"eval_samples_per_second": 78.061,
"eval_steps_per_second": 2.602,
"step": 77
},
{
"epoch": 10.84,
"learning_rate": 0.00018888888888888888,
"loss": 0.873,
"step": 84
},
{
"epoch": 10.97,
"eval_accuracy": 0.6,
"eval_loss": 0.9721332788467407,
"eval_runtime": 0.8664,
"eval_samples_per_second": 69.254,
"eval_steps_per_second": 2.308,
"step": 85
},
{
"epoch": 12.0,
"eval_accuracy": 0.5666666666666667,
"eval_loss": 0.9220641851425171,
"eval_runtime": 0.7382,
"eval_samples_per_second": 81.278,
"eval_steps_per_second": 2.709,
"step": 93
},
{
"epoch": 12.39,
"learning_rate": 0.00018412698412698412,
"loss": 0.7822,
"step": 96
},
{
"epoch": 12.9,
"eval_accuracy": 0.6166666666666667,
"eval_loss": 0.8593236207962036,
"eval_runtime": 1.1748,
"eval_samples_per_second": 51.071,
"eval_steps_per_second": 1.702,
"step": 100
},
{
"epoch": 13.94,
"learning_rate": 0.00017936507936507938,
"loss": 0.664,
"step": 108
},
{
"epoch": 13.94,
"eval_accuracy": 0.7,
"eval_loss": 0.7774909734725952,
"eval_runtime": 0.8265,
"eval_samples_per_second": 72.595,
"eval_steps_per_second": 2.42,
"step": 108
},
{
"epoch": 14.97,
"eval_accuracy": 0.6166666666666667,
"eval_loss": 0.8117440342903137,
"eval_runtime": 0.873,
"eval_samples_per_second": 68.731,
"eval_steps_per_second": 2.291,
"step": 116
},
{
"epoch": 15.48,
"learning_rate": 0.00017460317460317462,
"loss": 0.5439,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.6833333333333333,
"eval_loss": 0.7552784085273743,
"eval_runtime": 0.8067,
"eval_samples_per_second": 74.373,
"eval_steps_per_second": 2.479,
"step": 124
},
{
"epoch": 16.9,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.669671356678009,
"eval_runtime": 0.7809,
"eval_samples_per_second": 76.836,
"eval_steps_per_second": 2.561,
"step": 131
},
{
"epoch": 17.03,
"learning_rate": 0.00016984126984126986,
"loss": 0.496,
"step": 132
},
{
"epoch": 17.94,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.6479821801185608,
"eval_runtime": 0.8358,
"eval_samples_per_second": 71.789,
"eval_steps_per_second": 2.393,
"step": 139
},
{
"epoch": 18.58,
"learning_rate": 0.0001650793650793651,
"loss": 0.4563,
"step": 144
},
{
"epoch": 18.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7115061283111572,
"eval_runtime": 0.7213,
"eval_samples_per_second": 83.184,
"eval_steps_per_second": 2.773,
"step": 147
},
{
"epoch": 20.0,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.6776978373527527,
"eval_runtime": 1.1968,
"eval_samples_per_second": 50.132,
"eval_steps_per_second": 1.671,
"step": 155
},
{
"epoch": 20.13,
"learning_rate": 0.00016031746031746033,
"loss": 0.3831,
"step": 156
},
{
"epoch": 20.9,
"eval_accuracy": 0.7666666666666667,
"eval_loss": 0.641558825969696,
"eval_runtime": 0.7539,
"eval_samples_per_second": 79.588,
"eval_steps_per_second": 2.653,
"step": 162
},
{
"epoch": 21.68,
"learning_rate": 0.00015555555555555556,
"loss": 0.339,
"step": 168
},
{
"epoch": 21.94,
"eval_accuracy": 0.7,
"eval_loss": 0.7040281891822815,
"eval_runtime": 0.7707,
"eval_samples_per_second": 77.852,
"eval_steps_per_second": 2.595,
"step": 170
},
{
"epoch": 22.97,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.6858527064323425,
"eval_runtime": 0.7222,
"eval_samples_per_second": 83.082,
"eval_steps_per_second": 2.769,
"step": 178
},
{
"epoch": 23.23,
"learning_rate": 0.0001507936507936508,
"loss": 0.3033,
"step": 180
},
{
"epoch": 24.0,
"eval_accuracy": 0.7,
"eval_loss": 0.6012035608291626,
"eval_runtime": 0.8804,
"eval_samples_per_second": 68.154,
"eval_steps_per_second": 2.272,
"step": 186
},
{
"epoch": 24.77,
"learning_rate": 0.00014603174603174603,
"loss": 0.2655,
"step": 192
},
{
"epoch": 24.9,
"eval_accuracy": 0.7833333333333333,
"eval_loss": 0.5439515709877014,
"eval_runtime": 0.8773,
"eval_samples_per_second": 68.39,
"eval_steps_per_second": 2.28,
"step": 193
},
{
"epoch": 25.94,
"eval_accuracy": 0.75,
"eval_loss": 0.6173577904701233,
"eval_runtime": 0.755,
"eval_samples_per_second": 79.471,
"eval_steps_per_second": 2.649,
"step": 201
},
{
"epoch": 26.32,
"learning_rate": 0.0001412698412698413,
"loss": 0.2269,
"step": 204
},
{
"epoch": 26.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.5745389461517334,
"eval_runtime": 0.7541,
"eval_samples_per_second": 79.562,
"eval_steps_per_second": 2.652,
"step": 209
},
{
"epoch": 27.87,
"learning_rate": 0.0001365079365079365,
"loss": 0.2472,
"step": 216
},
{
"epoch": 28.0,
"eval_accuracy": 0.8,
"eval_loss": 0.5688391327857971,
"eval_runtime": 0.7386,
"eval_samples_per_second": 81.235,
"eval_steps_per_second": 2.708,
"step": 217
},
{
"epoch": 28.9,
"eval_accuracy": 0.75,
"eval_loss": 0.6578179597854614,
"eval_runtime": 0.7981,
"eval_samples_per_second": 75.174,
"eval_steps_per_second": 2.506,
"step": 224
},
{
"epoch": 29.42,
"learning_rate": 0.00013174603174603174,
"loss": 0.2004,
"step": 228
},
{
"epoch": 29.94,
"eval_accuracy": 0.7833333333333333,
"eval_loss": 0.5811216831207275,
"eval_runtime": 0.7017,
"eval_samples_per_second": 85.512,
"eval_steps_per_second": 2.85,
"step": 232
},
{
"epoch": 30.97,
"learning_rate": 0.00012698412698412698,
"loss": 0.2099,
"step": 240
},
{
"epoch": 30.97,
"eval_accuracy": 0.75,
"eval_loss": 0.6672316193580627,
"eval_runtime": 0.9579,
"eval_samples_per_second": 62.635,
"eval_steps_per_second": 2.088,
"step": 240
},
{
"epoch": 32.0,
"eval_accuracy": 0.75,
"eval_loss": 0.5926868319511414,
"eval_runtime": 0.7151,
"eval_samples_per_second": 83.907,
"eval_steps_per_second": 2.797,
"step": 248
},
{
"epoch": 32.52,
"learning_rate": 0.00012222222222222224,
"loss": 0.1834,
"step": 252
},
{
"epoch": 32.9,
"eval_accuracy": 0.7666666666666667,
"eval_loss": 0.6193079352378845,
"eval_runtime": 0.7137,
"eval_samples_per_second": 84.073,
"eval_steps_per_second": 2.802,
"step": 255
},
{
"epoch": 33.94,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.7505124807357788,
"eval_runtime": 0.8955,
"eval_samples_per_second": 67.004,
"eval_steps_per_second": 2.233,
"step": 263
},
{
"epoch": 34.06,
"learning_rate": 0.00011746031746031746,
"loss": 0.2248,
"step": 264
},
{
"epoch": 34.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7730365991592407,
"eval_runtime": 0.7499,
"eval_samples_per_second": 80.008,
"eval_steps_per_second": 2.667,
"step": 271
},
{
"epoch": 35.61,
"learning_rate": 0.0001126984126984127,
"loss": 0.1571,
"step": 276
},
{
"epoch": 36.0,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.6211021542549133,
"eval_runtime": 0.9402,
"eval_samples_per_second": 63.814,
"eval_steps_per_second": 2.127,
"step": 279
},
{
"epoch": 36.9,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.6227646470069885,
"eval_runtime": 0.747,
"eval_samples_per_second": 80.323,
"eval_steps_per_second": 2.677,
"step": 286
},
{
"epoch": 37.16,
"learning_rate": 0.00010793650793650794,
"loss": 0.1983,
"step": 288
},
{
"epoch": 37.94,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.6088116765022278,
"eval_runtime": 0.6721,
"eval_samples_per_second": 89.274,
"eval_steps_per_second": 2.976,
"step": 294
},
{
"epoch": 38.71,
"learning_rate": 0.00010317460317460319,
"loss": 0.1629,
"step": 300
},
{
"epoch": 38.97,
"eval_accuracy": 0.75,
"eval_loss": 0.7009025812149048,
"eval_runtime": 0.6916,
"eval_samples_per_second": 86.756,
"eval_steps_per_second": 2.892,
"step": 302
},
{
"epoch": 40.0,
"eval_accuracy": 0.75,
"eval_loss": 0.7284848690032959,
"eval_runtime": 0.6756,
"eval_samples_per_second": 88.806,
"eval_steps_per_second": 2.96,
"step": 310
},
{
"epoch": 40.26,
"learning_rate": 9.841269841269841e-05,
"loss": 0.1547,
"step": 312
},
{
"epoch": 40.9,
"eval_accuracy": 0.7666666666666667,
"eval_loss": 0.6401079297065735,
"eval_runtime": 1.0498,
"eval_samples_per_second": 57.152,
"eval_steps_per_second": 1.905,
"step": 317
},
{
"epoch": 41.81,
"learning_rate": 9.365079365079366e-05,
"loss": 0.1548,
"step": 324
},
{
"epoch": 41.94,
"eval_accuracy": 0.7833333333333333,
"eval_loss": 0.6122706532478333,
"eval_runtime": 0.7047,
"eval_samples_per_second": 85.142,
"eval_steps_per_second": 2.838,
"step": 325
},
{
"epoch": 42.97,
"eval_accuracy": 0.8,
"eval_loss": 0.6316841244697571,
"eval_runtime": 0.7237,
"eval_samples_per_second": 82.911,
"eval_steps_per_second": 2.764,
"step": 333
},
{
"epoch": 43.35,
"learning_rate": 8.888888888888889e-05,
"loss": 0.1566,
"step": 336
},
{
"epoch": 44.0,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.7579439282417297,
"eval_runtime": 0.6901,
"eval_samples_per_second": 86.944,
"eval_steps_per_second": 2.898,
"step": 341
},
{
"epoch": 44.9,
"learning_rate": 8.412698412698413e-05,
"loss": 0.1361,
"step": 348
},
{
"epoch": 44.9,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.6652740836143494,
"eval_runtime": 0.7005,
"eval_samples_per_second": 85.657,
"eval_steps_per_second": 2.855,
"step": 348
},
{
"epoch": 45.94,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7401434779167175,
"eval_runtime": 0.6837,
"eval_samples_per_second": 87.759,
"eval_steps_per_second": 2.925,
"step": 356
},
{
"epoch": 46.45,
"learning_rate": 7.936507936507937e-05,
"loss": 0.1273,
"step": 360
},
{
"epoch": 46.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.8404071927070618,
"eval_runtime": 0.852,
"eval_samples_per_second": 70.419,
"eval_steps_per_second": 2.347,
"step": 364
},
{
"epoch": 48.0,
"learning_rate": 7.460317460317461e-05,
"loss": 0.1312,
"step": 372
},
{
"epoch": 48.0,
"eval_accuracy": 0.75,
"eval_loss": 0.8388133645057678,
"eval_runtime": 0.7625,
"eval_samples_per_second": 78.691,
"eval_steps_per_second": 2.623,
"step": 372
},
{
"epoch": 48.9,
"eval_accuracy": 0.7666666666666667,
"eval_loss": 0.7823358774185181,
"eval_runtime": 0.7984,
"eval_samples_per_second": 75.152,
"eval_steps_per_second": 2.505,
"step": 379
},
{
"epoch": 49.55,
"learning_rate": 6.984126984126984e-05,
"loss": 0.1307,
"step": 384
},
{
"epoch": 49.94,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.6979826092720032,
"eval_runtime": 0.9296,
"eval_samples_per_second": 64.545,
"eval_steps_per_second": 2.151,
"step": 387
},
{
"epoch": 50.97,
"eval_accuracy": 0.75,
"eval_loss": 0.7589060664176941,
"eval_runtime": 0.9421,
"eval_samples_per_second": 63.685,
"eval_steps_per_second": 2.123,
"step": 395
},
{
"epoch": 51.1,
"learning_rate": 6.507936507936509e-05,
"loss": 0.1061,
"step": 396
},
{
"epoch": 52.0,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.664362907409668,
"eval_runtime": 0.9581,
"eval_samples_per_second": 62.625,
"eval_steps_per_second": 2.088,
"step": 403
},
{
"epoch": 52.65,
"learning_rate": 6.0317460317460316e-05,
"loss": 0.1186,
"step": 408
},
{
"epoch": 52.9,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7056966423988342,
"eval_runtime": 0.7627,
"eval_samples_per_second": 78.67,
"eval_steps_per_second": 2.622,
"step": 410
},
{
"epoch": 53.94,
"eval_accuracy": 0.8166666666666667,
"eval_loss": 0.6744123697280884,
"eval_runtime": 0.7522,
"eval_samples_per_second": 79.767,
"eval_steps_per_second": 2.659,
"step": 418
},
{
"epoch": 54.19,
"learning_rate": 5.555555555555556e-05,
"loss": 0.1108,
"step": 420
},
{
"epoch": 54.97,
"eval_accuracy": 0.7666666666666667,
"eval_loss": 0.6327721476554871,
"eval_runtime": 0.7919,
"eval_samples_per_second": 75.763,
"eval_steps_per_second": 2.525,
"step": 426
},
{
"epoch": 55.74,
"learning_rate": 5.0793650793650794e-05,
"loss": 0.1014,
"step": 432
},
{
"epoch": 56.0,
"eval_accuracy": 0.7833333333333333,
"eval_loss": 0.6401543021202087,
"eval_runtime": 0.7047,
"eval_samples_per_second": 85.145,
"eval_steps_per_second": 2.838,
"step": 434
},
{
"epoch": 56.9,
"eval_accuracy": 0.75,
"eval_loss": 0.6631258726119995,
"eval_runtime": 0.7567,
"eval_samples_per_second": 79.292,
"eval_steps_per_second": 2.643,
"step": 441
},
{
"epoch": 57.29,
"learning_rate": 4.603174603174603e-05,
"loss": 0.1082,
"step": 444
},
{
"epoch": 57.94,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7001372575759888,
"eval_runtime": 0.7863,
"eval_samples_per_second": 76.308,
"eval_steps_per_second": 2.544,
"step": 449
},
{
"epoch": 58.84,
"learning_rate": 4.126984126984127e-05,
"loss": 0.1118,
"step": 456
},
{
"epoch": 58.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7898235321044922,
"eval_runtime": 0.8339,
"eval_samples_per_second": 71.951,
"eval_steps_per_second": 2.398,
"step": 457
},
{
"epoch": 60.0,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7644184231758118,
"eval_runtime": 0.916,
"eval_samples_per_second": 65.499,
"eval_steps_per_second": 2.183,
"step": 465
},
{
"epoch": 60.39,
"learning_rate": 3.650793650793651e-05,
"loss": 0.1051,
"step": 468
},
{
"epoch": 60.9,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7767077088356018,
"eval_runtime": 0.7368,
"eval_samples_per_second": 81.432,
"eval_steps_per_second": 2.714,
"step": 472
},
{
"epoch": 61.94,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.0979,
"step": 480
},
{
"epoch": 61.94,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7440354824066162,
"eval_runtime": 0.8613,
"eval_samples_per_second": 69.665,
"eval_steps_per_second": 2.322,
"step": 480
},
{
"epoch": 62.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.6826977133750916,
"eval_runtime": 0.8727,
"eval_samples_per_second": 68.75,
"eval_steps_per_second": 2.292,
"step": 488
},
{
"epoch": 63.48,
"learning_rate": 2.6984126984126984e-05,
"loss": 0.0834,
"step": 492
},
{
"epoch": 64.0,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7008056640625,
"eval_runtime": 1.0337,
"eval_samples_per_second": 58.046,
"eval_steps_per_second": 1.935,
"step": 496
},
{
"epoch": 64.9,
"eval_accuracy": 0.7166666666666667,
"eval_loss": 0.7242893576622009,
"eval_runtime": 0.9008,
"eval_samples_per_second": 66.608,
"eval_steps_per_second": 2.22,
"step": 503
},
{
"epoch": 65.03,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.0963,
"step": 504
},
{
"epoch": 65.94,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7655950784683228,
"eval_runtime": 0.7846,
"eval_samples_per_second": 76.471,
"eval_steps_per_second": 2.549,
"step": 511
},
{
"epoch": 66.58,
"learning_rate": 1.746031746031746e-05,
"loss": 0.0989,
"step": 516
},
{
"epoch": 66.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.733224093914032,
"eval_runtime": 0.6942,
"eval_samples_per_second": 86.424,
"eval_steps_per_second": 2.881,
"step": 519
},
{
"epoch": 68.0,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7623672485351562,
"eval_runtime": 0.6962,
"eval_samples_per_second": 86.187,
"eval_steps_per_second": 2.873,
"step": 527
},
{
"epoch": 68.13,
"learning_rate": 1.2698412698412699e-05,
"loss": 0.107,
"step": 528
},
{
"epoch": 68.9,
"eval_accuracy": 0.75,
"eval_loss": 0.7291642427444458,
"eval_runtime": 0.8099,
"eval_samples_per_second": 74.08,
"eval_steps_per_second": 2.469,
"step": 534
},
{
"epoch": 69.68,
"learning_rate": 7.936507936507936e-06,
"loss": 0.0987,
"step": 540
},
{
"epoch": 69.94,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.7168775796890259,
"eval_runtime": 0.9171,
"eval_samples_per_second": 65.421,
"eval_steps_per_second": 2.181,
"step": 542
},
{
"epoch": 70.97,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.746231734752655,
"eval_runtime": 0.7381,
"eval_samples_per_second": 81.293,
"eval_steps_per_second": 2.71,
"step": 550
},
{
"epoch": 71.23,
"learning_rate": 3.1746031746031746e-06,
"loss": 0.0956,
"step": 552
},
{
"epoch": 72.0,
"eval_accuracy": 0.75,
"eval_loss": 0.6656435132026672,
"eval_runtime": 0.7048,
"eval_samples_per_second": 85.132,
"eval_steps_per_second": 2.838,
"step": 558
},
{
"epoch": 72.26,
"eval_accuracy": 0.7333333333333333,
"eval_loss": 0.6873000860214233,
"eval_runtime": 0.744,
"eval_samples_per_second": 80.641,
"eval_steps_per_second": 2.688,
"step": 560
},
{
"epoch": 72.26,
"step": 560,
"total_flos": 1.9293876649171354e+17,
"train_loss": 0.3574366893087115,
"train_runtime": 357.0027,
"train_samples_per_second": 218.262,
"train_steps_per_second": 1.569
}
],
"logging_steps": 12,
"max_steps": 560,
"num_input_tokens_seen": 0,
"num_train_epochs": 80,
"save_steps": 500,
"total_flos": 1.9293876649171354e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}