Niraya666's picture
End of training
aee3631
{
"best_metric": 0.8285714285714286,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-3cls-0922/checkpoint-40",
"epoch": 200.0,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6875032186508179,
"eval_runtime": 0.8381,
"eval_samples_per_second": 83.526,
"eval_steps_per_second": 2.386,
"step": 2
},
{
"epoch": 2.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6874324083328247,
"eval_runtime": 0.6285,
"eval_samples_per_second": 111.384,
"eval_steps_per_second": 3.182,
"step": 4
},
{
"epoch": 3.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6873045563697815,
"eval_runtime": 0.6533,
"eval_samples_per_second": 107.153,
"eval_steps_per_second": 3.062,
"step": 6
},
{
"epoch": 4.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.687107264995575,
"eval_runtime": 0.8041,
"eval_samples_per_second": 87.057,
"eval_steps_per_second": 2.487,
"step": 8
},
{
"epoch": 5.0,
"learning_rate": 1.25e-05,
"loss": 0.7555,
"step": 10
},
{
"epoch": 5.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6868652701377869,
"eval_runtime": 0.6585,
"eval_samples_per_second": 106.309,
"eval_steps_per_second": 3.037,
"step": 10
},
{
"epoch": 6.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.686565101146698,
"eval_runtime": 0.6372,
"eval_samples_per_second": 109.86,
"eval_steps_per_second": 3.139,
"step": 12
},
{
"epoch": 7.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6861968636512756,
"eval_runtime": 0.8401,
"eval_samples_per_second": 83.326,
"eval_steps_per_second": 2.381,
"step": 14
},
{
"epoch": 8.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.685771644115448,
"eval_runtime": 0.6402,
"eval_samples_per_second": 109.344,
"eval_steps_per_second": 3.124,
"step": 16
},
{
"epoch": 9.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6853042244911194,
"eval_runtime": 0.638,
"eval_samples_per_second": 109.711,
"eval_steps_per_second": 3.135,
"step": 18
},
{
"epoch": 10.0,
"learning_rate": 2.5e-05,
"loss": 0.7576,
"step": 20
},
{
"epoch": 10.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6847913861274719,
"eval_runtime": 0.8284,
"eval_samples_per_second": 84.496,
"eval_steps_per_second": 2.414,
"step": 20
},
{
"epoch": 11.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6842377185821533,
"eval_runtime": 0.6408,
"eval_samples_per_second": 109.237,
"eval_steps_per_second": 3.121,
"step": 22
},
{
"epoch": 12.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6836268901824951,
"eval_runtime": 0.6496,
"eval_samples_per_second": 107.755,
"eval_steps_per_second": 3.079,
"step": 24
},
{
"epoch": 13.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6829591393470764,
"eval_runtime": 0.8145,
"eval_samples_per_second": 85.938,
"eval_steps_per_second": 2.455,
"step": 26
},
{
"epoch": 14.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6822755336761475,
"eval_runtime": 0.6641,
"eval_samples_per_second": 105.412,
"eval_steps_per_second": 3.012,
"step": 28
},
{
"epoch": 15.0,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.769,
"step": 30
},
{
"epoch": 15.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6815804839134216,
"eval_runtime": 0.6278,
"eval_samples_per_second": 111.502,
"eval_steps_per_second": 3.186,
"step": 30
},
{
"epoch": 16.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6808401346206665,
"eval_runtime": 0.8247,
"eval_samples_per_second": 84.88,
"eval_steps_per_second": 2.425,
"step": 32
},
{
"epoch": 17.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6800239086151123,
"eval_runtime": 0.6376,
"eval_samples_per_second": 109.794,
"eval_steps_per_second": 3.137,
"step": 34
},
{
"epoch": 18.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.679133951663971,
"eval_runtime": 0.6356,
"eval_samples_per_second": 110.128,
"eval_steps_per_second": 3.147,
"step": 36
},
{
"epoch": 19.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6781331896781921,
"eval_runtime": 0.8136,
"eval_samples_per_second": 86.039,
"eval_steps_per_second": 2.458,
"step": 38
},
{
"epoch": 20.0,
"learning_rate": 5e-05,
"loss": 0.7564,
"step": 40
},
{
"epoch": 20.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.6770716309547424,
"eval_runtime": 0.627,
"eval_samples_per_second": 111.643,
"eval_steps_per_second": 3.19,
"step": 40
},
{
"epoch": 21.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6759592294692993,
"eval_runtime": 0.6244,
"eval_samples_per_second": 112.113,
"eval_steps_per_second": 3.203,
"step": 42
},
{
"epoch": 22.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.674824059009552,
"eval_runtime": 0.72,
"eval_samples_per_second": 97.226,
"eval_steps_per_second": 2.778,
"step": 44
},
{
"epoch": 23.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6736522912979126,
"eval_runtime": 0.6356,
"eval_samples_per_second": 110.125,
"eval_steps_per_second": 3.146,
"step": 46
},
{
"epoch": 24.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6724562644958496,
"eval_runtime": 0.6465,
"eval_samples_per_second": 108.268,
"eval_steps_per_second": 3.093,
"step": 48
},
{
"epoch": 25.0,
"learning_rate": 6.25e-05,
"loss": 0.7508,
"step": 50
},
{
"epoch": 25.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6713314056396484,
"eval_runtime": 0.6458,
"eval_samples_per_second": 108.385,
"eval_steps_per_second": 3.097,
"step": 50
},
{
"epoch": 26.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6701393723487854,
"eval_runtime": 0.7532,
"eval_samples_per_second": 92.934,
"eval_steps_per_second": 2.655,
"step": 52
},
{
"epoch": 27.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6688514947891235,
"eval_runtime": 0.6275,
"eval_samples_per_second": 111.546,
"eval_steps_per_second": 3.187,
"step": 54
},
{
"epoch": 28.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6674489378929138,
"eval_runtime": 0.6455,
"eval_samples_per_second": 108.446,
"eval_steps_per_second": 3.098,
"step": 56
},
{
"epoch": 29.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6660061478614807,
"eval_runtime": 0.7926,
"eval_samples_per_second": 88.312,
"eval_steps_per_second": 2.523,
"step": 58
},
{
"epoch": 30.0,
"learning_rate": 7.500000000000001e-05,
"loss": 0.747,
"step": 60
},
{
"epoch": 30.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6645620465278625,
"eval_runtime": 0.6238,
"eval_samples_per_second": 112.214,
"eval_steps_per_second": 3.206,
"step": 60
},
{
"epoch": 31.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6631242632865906,
"eval_runtime": 0.651,
"eval_samples_per_second": 107.52,
"eval_steps_per_second": 3.072,
"step": 62
},
{
"epoch": 32.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6616196036338806,
"eval_runtime": 0.8036,
"eval_samples_per_second": 87.111,
"eval_steps_per_second": 2.489,
"step": 64
},
{
"epoch": 33.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6600926518440247,
"eval_runtime": 0.638,
"eval_samples_per_second": 109.722,
"eval_steps_per_second": 3.135,
"step": 66
},
{
"epoch": 34.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6585766673088074,
"eval_runtime": 0.6365,
"eval_samples_per_second": 109.973,
"eval_steps_per_second": 3.142,
"step": 68
},
{
"epoch": 35.0,
"learning_rate": 8.75e-05,
"loss": 0.7343,
"step": 70
},
{
"epoch": 35.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6569960117340088,
"eval_runtime": 0.7913,
"eval_samples_per_second": 88.467,
"eval_steps_per_second": 2.528,
"step": 70
},
{
"epoch": 36.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6553293466567993,
"eval_runtime": 0.6317,
"eval_samples_per_second": 110.815,
"eval_steps_per_second": 3.166,
"step": 72
},
{
"epoch": 37.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6535871028900146,
"eval_runtime": 0.6261,
"eval_samples_per_second": 111.81,
"eval_steps_per_second": 3.195,
"step": 74
},
{
"epoch": 38.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6517333388328552,
"eval_runtime": 0.7801,
"eval_samples_per_second": 89.727,
"eval_steps_per_second": 2.564,
"step": 76
},
{
"epoch": 39.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6498710513114929,
"eval_runtime": 0.6688,
"eval_samples_per_second": 104.667,
"eval_steps_per_second": 2.99,
"step": 78
},
{
"epoch": 40.0,
"learning_rate": 0.0001,
"loss": 0.7532,
"step": 80
},
{
"epoch": 40.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6480462551116943,
"eval_runtime": 0.6422,
"eval_samples_per_second": 108.998,
"eval_steps_per_second": 3.114,
"step": 80
},
{
"epoch": 41.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6461040377616882,
"eval_runtime": 0.7878,
"eval_samples_per_second": 88.86,
"eval_steps_per_second": 2.539,
"step": 82
},
{
"epoch": 42.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6441839337348938,
"eval_runtime": 0.6221,
"eval_samples_per_second": 112.518,
"eval_steps_per_second": 3.215,
"step": 84
},
{
"epoch": 43.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6423068046569824,
"eval_runtime": 0.6404,
"eval_samples_per_second": 109.306,
"eval_steps_per_second": 3.123,
"step": 86
},
{
"epoch": 44.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6404834985733032,
"eval_runtime": 0.8194,
"eval_samples_per_second": 85.431,
"eval_steps_per_second": 2.441,
"step": 88
},
{
"epoch": 45.0,
"learning_rate": 9.687500000000001e-05,
"loss": 0.7239,
"step": 90
},
{
"epoch": 45.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.638668417930603,
"eval_runtime": 0.6293,
"eval_samples_per_second": 111.227,
"eval_steps_per_second": 3.178,
"step": 90
},
{
"epoch": 46.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6368482112884521,
"eval_runtime": 0.6307,
"eval_samples_per_second": 110.981,
"eval_steps_per_second": 3.171,
"step": 92
},
{
"epoch": 47.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6351889967918396,
"eval_runtime": 0.8243,
"eval_samples_per_second": 84.921,
"eval_steps_per_second": 2.426,
"step": 94
},
{
"epoch": 48.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6336590051651001,
"eval_runtime": 0.6325,
"eval_samples_per_second": 110.664,
"eval_steps_per_second": 3.162,
"step": 96
},
{
"epoch": 49.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.6321325302124023,
"eval_runtime": 0.6292,
"eval_samples_per_second": 111.258,
"eval_steps_per_second": 3.179,
"step": 98
},
{
"epoch": 50.0,
"learning_rate": 9.375e-05,
"loss": 0.7085,
"step": 100
},
{
"epoch": 50.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.6307134628295898,
"eval_runtime": 0.8147,
"eval_samples_per_second": 85.924,
"eval_steps_per_second": 2.455,
"step": 100
},
{
"epoch": 51.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.6293519139289856,
"eval_runtime": 0.6273,
"eval_samples_per_second": 111.588,
"eval_steps_per_second": 3.188,
"step": 102
},
{
"epoch": 52.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.6278188228607178,
"eval_runtime": 0.6366,
"eval_samples_per_second": 109.96,
"eval_steps_per_second": 3.142,
"step": 104
},
{
"epoch": 53.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.6263061165809631,
"eval_runtime": 0.8106,
"eval_samples_per_second": 86.353,
"eval_steps_per_second": 2.467,
"step": 106
},
{
"epoch": 54.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6247809529304504,
"eval_runtime": 0.637,
"eval_samples_per_second": 109.885,
"eval_steps_per_second": 3.14,
"step": 108
},
{
"epoch": 55.0,
"learning_rate": 9.062500000000001e-05,
"loss": 0.7203,
"step": 110
},
{
"epoch": 55.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6232935190200806,
"eval_runtime": 0.6312,
"eval_samples_per_second": 110.901,
"eval_steps_per_second": 3.169,
"step": 110
},
{
"epoch": 56.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6218679547309875,
"eval_runtime": 0.8253,
"eval_samples_per_second": 84.819,
"eval_steps_per_second": 2.423,
"step": 112
},
{
"epoch": 57.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6204643845558167,
"eval_runtime": 0.6393,
"eval_samples_per_second": 109.495,
"eval_steps_per_second": 3.128,
"step": 114
},
{
"epoch": 58.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6191075444221497,
"eval_runtime": 0.6278,
"eval_samples_per_second": 111.495,
"eval_steps_per_second": 3.186,
"step": 116
},
{
"epoch": 59.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6178752779960632,
"eval_runtime": 0.8233,
"eval_samples_per_second": 85.027,
"eval_steps_per_second": 2.429,
"step": 118
},
{
"epoch": 60.0,
"learning_rate": 8.75e-05,
"loss": 0.7136,
"step": 120
},
{
"epoch": 60.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6167242527008057,
"eval_runtime": 0.6489,
"eval_samples_per_second": 107.881,
"eval_steps_per_second": 3.082,
"step": 120
},
{
"epoch": 61.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6156985759735107,
"eval_runtime": 0.6472,
"eval_samples_per_second": 108.154,
"eval_steps_per_second": 3.09,
"step": 122
},
{
"epoch": 62.0,
"eval_accuracy": 0.8,
"eval_loss": 0.61481112241745,
"eval_runtime": 0.8228,
"eval_samples_per_second": 85.073,
"eval_steps_per_second": 2.431,
"step": 124
},
{
"epoch": 63.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6138356328010559,
"eval_runtime": 0.6327,
"eval_samples_per_second": 110.64,
"eval_steps_per_second": 3.161,
"step": 126
},
{
"epoch": 64.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6125301122665405,
"eval_runtime": 0.6379,
"eval_samples_per_second": 109.736,
"eval_steps_per_second": 3.135,
"step": 128
},
{
"epoch": 65.0,
"learning_rate": 8.4375e-05,
"loss": 0.7123,
"step": 130
},
{
"epoch": 65.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6110576391220093,
"eval_runtime": 0.825,
"eval_samples_per_second": 84.849,
"eval_steps_per_second": 2.424,
"step": 130
},
{
"epoch": 66.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6096405982971191,
"eval_runtime": 0.6376,
"eval_samples_per_second": 109.782,
"eval_steps_per_second": 3.137,
"step": 132
},
{
"epoch": 67.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6083278656005859,
"eval_runtime": 0.8232,
"eval_samples_per_second": 85.037,
"eval_steps_per_second": 2.43,
"step": 134
},
{
"epoch": 68.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6069909930229187,
"eval_runtime": 0.8193,
"eval_samples_per_second": 85.437,
"eval_steps_per_second": 2.441,
"step": 136
},
{
"epoch": 69.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6057179570198059,
"eval_runtime": 0.6486,
"eval_samples_per_second": 107.925,
"eval_steps_per_second": 3.084,
"step": 138
},
{
"epoch": 70.0,
"learning_rate": 8.125000000000001e-05,
"loss": 0.7076,
"step": 140
},
{
"epoch": 70.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.604619562625885,
"eval_runtime": 0.6358,
"eval_samples_per_second": 110.095,
"eval_steps_per_second": 3.146,
"step": 140
},
{
"epoch": 71.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6034784913063049,
"eval_runtime": 0.8201,
"eval_samples_per_second": 85.352,
"eval_steps_per_second": 2.439,
"step": 142
},
{
"epoch": 72.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6022736430168152,
"eval_runtime": 0.6311,
"eval_samples_per_second": 110.91,
"eval_steps_per_second": 3.169,
"step": 144
},
{
"epoch": 73.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.6011058688163757,
"eval_runtime": 0.6566,
"eval_samples_per_second": 106.607,
"eval_steps_per_second": 3.046,
"step": 146
},
{
"epoch": 74.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5999324917793274,
"eval_runtime": 0.8174,
"eval_samples_per_second": 85.64,
"eval_steps_per_second": 2.447,
"step": 148
},
{
"epoch": 75.0,
"learning_rate": 7.8125e-05,
"loss": 0.6878,
"step": 150
},
{
"epoch": 75.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5987647175788879,
"eval_runtime": 0.6275,
"eval_samples_per_second": 111.562,
"eval_steps_per_second": 3.187,
"step": 150
},
{
"epoch": 76.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5975351333618164,
"eval_runtime": 0.6296,
"eval_samples_per_second": 111.178,
"eval_steps_per_second": 3.177,
"step": 152
},
{
"epoch": 77.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5963953137397766,
"eval_runtime": 0.8044,
"eval_samples_per_second": 87.018,
"eval_steps_per_second": 2.486,
"step": 154
},
{
"epoch": 78.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5952684879302979,
"eval_runtime": 0.6501,
"eval_samples_per_second": 107.669,
"eval_steps_per_second": 3.076,
"step": 156
},
{
"epoch": 79.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5942099094390869,
"eval_runtime": 0.6469,
"eval_samples_per_second": 108.203,
"eval_steps_per_second": 3.092,
"step": 158
},
{
"epoch": 80.0,
"learning_rate": 7.500000000000001e-05,
"loss": 0.6657,
"step": 160
},
{
"epoch": 80.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5932222008705139,
"eval_runtime": 0.8259,
"eval_samples_per_second": 84.754,
"eval_steps_per_second": 2.422,
"step": 160
},
{
"epoch": 81.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5923032760620117,
"eval_runtime": 0.6393,
"eval_samples_per_second": 109.49,
"eval_steps_per_second": 3.128,
"step": 162
},
{
"epoch": 82.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5914328098297119,
"eval_runtime": 0.6466,
"eval_samples_per_second": 108.262,
"eval_steps_per_second": 3.093,
"step": 164
},
{
"epoch": 83.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5905909538269043,
"eval_runtime": 0.8278,
"eval_samples_per_second": 84.56,
"eval_steps_per_second": 2.416,
"step": 166
},
{
"epoch": 84.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5897351503372192,
"eval_runtime": 0.6485,
"eval_samples_per_second": 107.935,
"eval_steps_per_second": 3.084,
"step": 168
},
{
"epoch": 85.0,
"learning_rate": 7.1875e-05,
"loss": 0.6434,
"step": 170
},
{
"epoch": 85.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.588803768157959,
"eval_runtime": 0.6407,
"eval_samples_per_second": 109.255,
"eval_steps_per_second": 3.122,
"step": 170
},
{
"epoch": 86.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5878075361251831,
"eval_runtime": 0.7846,
"eval_samples_per_second": 89.216,
"eval_steps_per_second": 2.549,
"step": 172
},
{
"epoch": 87.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5868256688117981,
"eval_runtime": 0.6427,
"eval_samples_per_second": 108.917,
"eval_steps_per_second": 3.112,
"step": 174
},
{
"epoch": 88.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5859082341194153,
"eval_runtime": 0.6384,
"eval_samples_per_second": 109.65,
"eval_steps_per_second": 3.133,
"step": 176
},
{
"epoch": 89.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5850787162780762,
"eval_runtime": 0.7009,
"eval_samples_per_second": 99.878,
"eval_steps_per_second": 2.854,
"step": 178
},
{
"epoch": 90.0,
"learning_rate": 6.875e-05,
"loss": 0.6825,
"step": 180
},
{
"epoch": 90.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5843265652656555,
"eval_runtime": 0.6343,
"eval_samples_per_second": 110.361,
"eval_steps_per_second": 3.153,
"step": 180
},
{
"epoch": 91.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5835766792297363,
"eval_runtime": 0.645,
"eval_samples_per_second": 108.529,
"eval_steps_per_second": 3.101,
"step": 182
},
{
"epoch": 92.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5828419923782349,
"eval_runtime": 0.6414,
"eval_samples_per_second": 109.129,
"eval_steps_per_second": 3.118,
"step": 184
},
{
"epoch": 93.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5822591781616211,
"eval_runtime": 0.6506,
"eval_samples_per_second": 107.585,
"eval_steps_per_second": 3.074,
"step": 186
},
{
"epoch": 94.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5817149877548218,
"eval_runtime": 0.6481,
"eval_samples_per_second": 108.003,
"eval_steps_per_second": 3.086,
"step": 188
},
{
"epoch": 95.0,
"learning_rate": 6.562500000000001e-05,
"loss": 0.6695,
"step": 190
},
{
"epoch": 95.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5809342265129089,
"eval_runtime": 0.6426,
"eval_samples_per_second": 108.939,
"eval_steps_per_second": 3.113,
"step": 190
},
{
"epoch": 96.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5801157355308533,
"eval_runtime": 0.7408,
"eval_samples_per_second": 94.487,
"eval_steps_per_second": 2.7,
"step": 192
},
{
"epoch": 97.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5793442130088806,
"eval_runtime": 0.6328,
"eval_samples_per_second": 110.628,
"eval_steps_per_second": 3.161,
"step": 194
},
{
"epoch": 98.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5787318348884583,
"eval_runtime": 0.6404,
"eval_samples_per_second": 109.309,
"eval_steps_per_second": 3.123,
"step": 196
},
{
"epoch": 99.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5780039429664612,
"eval_runtime": 0.7894,
"eval_samples_per_second": 88.678,
"eval_steps_per_second": 2.534,
"step": 198
},
{
"epoch": 100.0,
"learning_rate": 6.25e-05,
"loss": 0.6672,
"step": 200
},
{
"epoch": 100.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5772114992141724,
"eval_runtime": 0.6485,
"eval_samples_per_second": 107.942,
"eval_steps_per_second": 3.084,
"step": 200
},
{
"epoch": 101.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5762485265731812,
"eval_runtime": 0.632,
"eval_samples_per_second": 110.757,
"eval_steps_per_second": 3.164,
"step": 202
},
{
"epoch": 102.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5753609538078308,
"eval_runtime": 0.8156,
"eval_samples_per_second": 85.824,
"eval_steps_per_second": 2.452,
"step": 204
},
{
"epoch": 103.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5745884776115417,
"eval_runtime": 0.641,
"eval_samples_per_second": 109.197,
"eval_steps_per_second": 3.12,
"step": 206
},
{
"epoch": 104.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.573843777179718,
"eval_runtime": 0.64,
"eval_samples_per_second": 109.374,
"eval_steps_per_second": 3.125,
"step": 208
},
{
"epoch": 105.0,
"learning_rate": 5.9375e-05,
"loss": 0.6569,
"step": 210
},
{
"epoch": 105.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5731338858604431,
"eval_runtime": 0.8165,
"eval_samples_per_second": 85.735,
"eval_steps_per_second": 2.45,
"step": 210
},
{
"epoch": 106.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5723776817321777,
"eval_runtime": 0.6448,
"eval_samples_per_second": 108.558,
"eval_steps_per_second": 3.102,
"step": 212
},
{
"epoch": 107.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5715596675872803,
"eval_runtime": 0.6552,
"eval_samples_per_second": 106.837,
"eval_steps_per_second": 3.052,
"step": 214
},
{
"epoch": 108.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5707866549491882,
"eval_runtime": 0.7955,
"eval_samples_per_second": 87.991,
"eval_steps_per_second": 2.514,
"step": 216
},
{
"epoch": 109.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.570074737071991,
"eval_runtime": 0.6364,
"eval_samples_per_second": 109.993,
"eval_steps_per_second": 3.143,
"step": 218
},
{
"epoch": 110.0,
"learning_rate": 5.6250000000000005e-05,
"loss": 0.6748,
"step": 220
},
{
"epoch": 110.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5693923830986023,
"eval_runtime": 0.6356,
"eval_samples_per_second": 110.138,
"eval_steps_per_second": 3.147,
"step": 220
},
{
"epoch": 111.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5686994791030884,
"eval_runtime": 0.8207,
"eval_samples_per_second": 85.298,
"eval_steps_per_second": 2.437,
"step": 222
},
{
"epoch": 112.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5680269598960876,
"eval_runtime": 0.6498,
"eval_samples_per_second": 107.722,
"eval_steps_per_second": 3.078,
"step": 224
},
{
"epoch": 113.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5673888325691223,
"eval_runtime": 0.6711,
"eval_samples_per_second": 104.299,
"eval_steps_per_second": 2.98,
"step": 226
},
{
"epoch": 114.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5668244957923889,
"eval_runtime": 0.8599,
"eval_samples_per_second": 81.4,
"eval_steps_per_second": 2.326,
"step": 228
},
{
"epoch": 115.0,
"learning_rate": 5.3125000000000004e-05,
"loss": 0.6388,
"step": 230
},
{
"epoch": 115.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.56624835729599,
"eval_runtime": 0.639,
"eval_samples_per_second": 109.549,
"eval_steps_per_second": 3.13,
"step": 230
},
{
"epoch": 116.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5657045841217041,
"eval_runtime": 0.6452,
"eval_samples_per_second": 108.495,
"eval_steps_per_second": 3.1,
"step": 232
},
{
"epoch": 117.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5652384757995605,
"eval_runtime": 0.8319,
"eval_samples_per_second": 84.146,
"eval_steps_per_second": 2.404,
"step": 234
},
{
"epoch": 118.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5648259520530701,
"eval_runtime": 0.6475,
"eval_samples_per_second": 108.103,
"eval_steps_per_second": 3.089,
"step": 236
},
{
"epoch": 119.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5644696354866028,
"eval_runtime": 0.6531,
"eval_samples_per_second": 107.184,
"eval_steps_per_second": 3.062,
"step": 238
},
{
"epoch": 120.0,
"learning_rate": 5e-05,
"loss": 0.6551,
"step": 240
},
{
"epoch": 120.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5640624165534973,
"eval_runtime": 0.8277,
"eval_samples_per_second": 84.574,
"eval_steps_per_second": 2.416,
"step": 240
},
{
"epoch": 121.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5636399388313293,
"eval_runtime": 0.6478,
"eval_samples_per_second": 108.056,
"eval_steps_per_second": 3.087,
"step": 242
},
{
"epoch": 122.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.563149094581604,
"eval_runtime": 0.6338,
"eval_samples_per_second": 110.453,
"eval_steps_per_second": 3.156,
"step": 244
},
{
"epoch": 123.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5627174973487854,
"eval_runtime": 0.8111,
"eval_samples_per_second": 86.304,
"eval_steps_per_second": 2.466,
"step": 246
},
{
"epoch": 124.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.562400221824646,
"eval_runtime": 0.649,
"eval_samples_per_second": 107.86,
"eval_steps_per_second": 3.082,
"step": 248
},
{
"epoch": 125.0,
"learning_rate": 4.6875e-05,
"loss": 0.6452,
"step": 250
},
{
"epoch": 125.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5621911287307739,
"eval_runtime": 0.6465,
"eval_samples_per_second": 108.279,
"eval_steps_per_second": 3.094,
"step": 250
},
{
"epoch": 126.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5620221495628357,
"eval_runtime": 0.827,
"eval_samples_per_second": 84.639,
"eval_steps_per_second": 2.418,
"step": 252
},
{
"epoch": 127.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5617978572845459,
"eval_runtime": 0.6221,
"eval_samples_per_second": 112.525,
"eval_steps_per_second": 3.215,
"step": 254
},
{
"epoch": 128.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5614616870880127,
"eval_runtime": 0.6384,
"eval_samples_per_second": 109.65,
"eval_steps_per_second": 3.133,
"step": 256
},
{
"epoch": 129.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5612771511077881,
"eval_runtime": 0.8188,
"eval_samples_per_second": 85.487,
"eval_steps_per_second": 2.442,
"step": 258
},
{
"epoch": 130.0,
"learning_rate": 4.375e-05,
"loss": 0.645,
"step": 260
},
{
"epoch": 130.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5610944032669067,
"eval_runtime": 0.6274,
"eval_samples_per_second": 111.57,
"eval_steps_per_second": 3.188,
"step": 260
},
{
"epoch": 131.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5608205199241638,
"eval_runtime": 0.6351,
"eval_samples_per_second": 110.223,
"eval_steps_per_second": 3.149,
"step": 262
},
{
"epoch": 132.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5606086254119873,
"eval_runtime": 0.8451,
"eval_samples_per_second": 82.832,
"eval_steps_per_second": 2.367,
"step": 264
},
{
"epoch": 133.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5602155923843384,
"eval_runtime": 0.6314,
"eval_samples_per_second": 110.864,
"eval_steps_per_second": 3.168,
"step": 266
},
{
"epoch": 134.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5596277713775635,
"eval_runtime": 0.6347,
"eval_samples_per_second": 110.28,
"eval_steps_per_second": 3.151,
"step": 268
},
{
"epoch": 135.0,
"learning_rate": 4.0625000000000005e-05,
"loss": 0.629,
"step": 270
},
{
"epoch": 135.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.558956503868103,
"eval_runtime": 0.8112,
"eval_samples_per_second": 86.289,
"eval_steps_per_second": 2.465,
"step": 270
},
{
"epoch": 136.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5582412481307983,
"eval_runtime": 0.6394,
"eval_samples_per_second": 109.485,
"eval_steps_per_second": 3.128,
"step": 272
},
{
"epoch": 137.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5576009750366211,
"eval_runtime": 0.6293,
"eval_samples_per_second": 111.232,
"eval_steps_per_second": 3.178,
"step": 274
},
{
"epoch": 138.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5571399927139282,
"eval_runtime": 0.8108,
"eval_samples_per_second": 86.33,
"eval_steps_per_second": 2.467,
"step": 276
},
{
"epoch": 139.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5567926168441772,
"eval_runtime": 0.6262,
"eval_samples_per_second": 111.788,
"eval_steps_per_second": 3.194,
"step": 278
},
{
"epoch": 140.0,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.7126,
"step": 280
},
{
"epoch": 140.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.556534469127655,
"eval_runtime": 0.6392,
"eval_samples_per_second": 109.51,
"eval_steps_per_second": 3.129,
"step": 280
},
{
"epoch": 141.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5563255548477173,
"eval_runtime": 0.8384,
"eval_samples_per_second": 83.488,
"eval_steps_per_second": 2.385,
"step": 282
},
{
"epoch": 142.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5561147928237915,
"eval_runtime": 0.6382,
"eval_samples_per_second": 109.687,
"eval_steps_per_second": 3.134,
"step": 284
},
{
"epoch": 143.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5559044480323792,
"eval_runtime": 0.647,
"eval_samples_per_second": 108.191,
"eval_steps_per_second": 3.091,
"step": 286
},
{
"epoch": 144.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.555549681186676,
"eval_runtime": 0.8257,
"eval_samples_per_second": 84.779,
"eval_steps_per_second": 2.422,
"step": 288
},
{
"epoch": 145.0,
"learning_rate": 3.4375e-05,
"loss": 0.669,
"step": 290
},
{
"epoch": 145.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5551820397377014,
"eval_runtime": 0.6311,
"eval_samples_per_second": 110.921,
"eval_steps_per_second": 3.169,
"step": 290
},
{
"epoch": 146.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.55474454164505,
"eval_runtime": 0.653,
"eval_samples_per_second": 107.193,
"eval_steps_per_second": 3.063,
"step": 292
},
{
"epoch": 147.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5542392134666443,
"eval_runtime": 0.8093,
"eval_samples_per_second": 86.493,
"eval_steps_per_second": 2.471,
"step": 294
},
{
"epoch": 148.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5537976622581482,
"eval_runtime": 0.6472,
"eval_samples_per_second": 108.161,
"eval_steps_per_second": 3.09,
"step": 296
},
{
"epoch": 149.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5534089803695679,
"eval_runtime": 0.6283,
"eval_samples_per_second": 111.403,
"eval_steps_per_second": 3.183,
"step": 298
},
{
"epoch": 150.0,
"learning_rate": 3.125e-05,
"loss": 0.6481,
"step": 300
},
{
"epoch": 150.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5530030727386475,
"eval_runtime": 0.8284,
"eval_samples_per_second": 84.505,
"eval_steps_per_second": 2.414,
"step": 300
},
{
"epoch": 151.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5526387095451355,
"eval_runtime": 0.6358,
"eval_samples_per_second": 110.105,
"eval_steps_per_second": 3.146,
"step": 302
},
{
"epoch": 152.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5522416830062866,
"eval_runtime": 0.6285,
"eval_samples_per_second": 111.384,
"eval_steps_per_second": 3.182,
"step": 304
},
{
"epoch": 153.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5518553853034973,
"eval_runtime": 0.7207,
"eval_samples_per_second": 97.122,
"eval_steps_per_second": 2.775,
"step": 306
},
{
"epoch": 154.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5514690279960632,
"eval_runtime": 0.6359,
"eval_samples_per_second": 110.085,
"eval_steps_per_second": 3.145,
"step": 308
},
{
"epoch": 155.0,
"learning_rate": 2.8125000000000003e-05,
"loss": 0.6211,
"step": 310
},
{
"epoch": 155.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5510378479957581,
"eval_runtime": 0.636,
"eval_samples_per_second": 110.056,
"eval_steps_per_second": 3.144,
"step": 310
},
{
"epoch": 156.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5506120920181274,
"eval_runtime": 0.6297,
"eval_samples_per_second": 111.157,
"eval_steps_per_second": 3.176,
"step": 312
},
{
"epoch": 157.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5502142906188965,
"eval_runtime": 0.6795,
"eval_samples_per_second": 103.02,
"eval_steps_per_second": 2.943,
"step": 314
},
{
"epoch": 158.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5498998761177063,
"eval_runtime": 0.6321,
"eval_samples_per_second": 110.745,
"eval_steps_per_second": 3.164,
"step": 316
},
{
"epoch": 159.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5495581030845642,
"eval_runtime": 0.6392,
"eval_samples_per_second": 109.52,
"eval_steps_per_second": 3.129,
"step": 318
},
{
"epoch": 160.0,
"learning_rate": 2.5e-05,
"loss": 0.6458,
"step": 320
},
{
"epoch": 160.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5492438077926636,
"eval_runtime": 0.8024,
"eval_samples_per_second": 87.237,
"eval_steps_per_second": 2.492,
"step": 320
},
{
"epoch": 161.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5489979982376099,
"eval_runtime": 0.6575,
"eval_samples_per_second": 106.471,
"eval_steps_per_second": 3.042,
"step": 322
},
{
"epoch": 162.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5487762689590454,
"eval_runtime": 0.6515,
"eval_samples_per_second": 107.45,
"eval_steps_per_second": 3.07,
"step": 324
},
{
"epoch": 163.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.548595130443573,
"eval_runtime": 0.8069,
"eval_samples_per_second": 86.752,
"eval_steps_per_second": 2.479,
"step": 326
},
{
"epoch": 164.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5483713746070862,
"eval_runtime": 0.6447,
"eval_samples_per_second": 108.582,
"eval_steps_per_second": 3.102,
"step": 328
},
{
"epoch": 165.0,
"learning_rate": 2.1875e-05,
"loss": 0.6317,
"step": 330
},
{
"epoch": 165.0,
"eval_accuracy": 0.8142857142857143,
"eval_loss": 0.5481104254722595,
"eval_runtime": 0.6486,
"eval_samples_per_second": 107.926,
"eval_steps_per_second": 3.084,
"step": 330
},
{
"epoch": 166.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5478586554527283,
"eval_runtime": 0.8209,
"eval_samples_per_second": 85.268,
"eval_steps_per_second": 2.436,
"step": 332
},
{
"epoch": 167.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5475797653198242,
"eval_runtime": 0.7417,
"eval_samples_per_second": 94.381,
"eval_steps_per_second": 2.697,
"step": 334
},
{
"epoch": 168.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5473471879959106,
"eval_runtime": 0.6501,
"eval_samples_per_second": 107.671,
"eval_steps_per_second": 3.076,
"step": 336
},
{
"epoch": 169.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5471236705780029,
"eval_runtime": 0.7944,
"eval_samples_per_second": 88.115,
"eval_steps_per_second": 2.518,
"step": 338
},
{
"epoch": 170.0,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.6154,
"step": 340
},
{
"epoch": 170.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5469514727592468,
"eval_runtime": 0.6378,
"eval_samples_per_second": 109.76,
"eval_steps_per_second": 3.136,
"step": 340
},
{
"epoch": 171.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5467889308929443,
"eval_runtime": 0.6433,
"eval_samples_per_second": 108.819,
"eval_steps_per_second": 3.109,
"step": 342
},
{
"epoch": 172.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5466357469558716,
"eval_runtime": 0.8146,
"eval_samples_per_second": 85.93,
"eval_steps_per_second": 2.455,
"step": 344
},
{
"epoch": 173.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5464411973953247,
"eval_runtime": 0.6826,
"eval_samples_per_second": 102.554,
"eval_steps_per_second": 2.93,
"step": 346
},
{
"epoch": 174.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5462457537651062,
"eval_runtime": 0.6413,
"eval_samples_per_second": 109.146,
"eval_steps_per_second": 3.118,
"step": 348
},
{
"epoch": 175.0,
"learning_rate": 1.5625e-05,
"loss": 0.6323,
"step": 350
},
{
"epoch": 175.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5460384488105774,
"eval_runtime": 0.8055,
"eval_samples_per_second": 86.906,
"eval_steps_per_second": 2.483,
"step": 350
},
{
"epoch": 176.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.545864999294281,
"eval_runtime": 0.635,
"eval_samples_per_second": 110.23,
"eval_steps_per_second": 3.149,
"step": 352
},
{
"epoch": 177.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.54571932554245,
"eval_runtime": 0.6362,
"eval_samples_per_second": 110.035,
"eval_steps_per_second": 3.144,
"step": 354
},
{
"epoch": 178.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5455992817878723,
"eval_runtime": 0.8155,
"eval_samples_per_second": 85.839,
"eval_steps_per_second": 2.453,
"step": 356
},
{
"epoch": 179.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5454698801040649,
"eval_runtime": 0.6543,
"eval_samples_per_second": 106.992,
"eval_steps_per_second": 3.057,
"step": 358
},
{
"epoch": 180.0,
"learning_rate": 1.25e-05,
"loss": 0.6331,
"step": 360
},
{
"epoch": 180.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5453290343284607,
"eval_runtime": 0.6499,
"eval_samples_per_second": 107.716,
"eval_steps_per_second": 3.078,
"step": 360
},
{
"epoch": 181.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5451884865760803,
"eval_runtime": 0.8265,
"eval_samples_per_second": 84.691,
"eval_steps_per_second": 2.42,
"step": 362
},
{
"epoch": 182.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5450613498687744,
"eval_runtime": 0.6389,
"eval_samples_per_second": 109.556,
"eval_steps_per_second": 3.13,
"step": 364
},
{
"epoch": 183.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5449284911155701,
"eval_runtime": 0.6467,
"eval_samples_per_second": 108.243,
"eval_steps_per_second": 3.093,
"step": 366
},
{
"epoch": 184.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5448177456855774,
"eval_runtime": 0.825,
"eval_samples_per_second": 84.846,
"eval_steps_per_second": 2.424,
"step": 368
},
{
"epoch": 185.0,
"learning_rate": 9.375000000000001e-06,
"loss": 0.6333,
"step": 370
},
{
"epoch": 185.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.544733464717865,
"eval_runtime": 0.6541,
"eval_samples_per_second": 107.012,
"eval_steps_per_second": 3.057,
"step": 370
},
{
"epoch": 186.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5446553230285645,
"eval_runtime": 0.6491,
"eval_samples_per_second": 107.838,
"eval_steps_per_second": 3.081,
"step": 372
},
{
"epoch": 187.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5445802211761475,
"eval_runtime": 0.8184,
"eval_samples_per_second": 85.533,
"eval_steps_per_second": 2.444,
"step": 374
},
{
"epoch": 188.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5445207357406616,
"eval_runtime": 0.6378,
"eval_samples_per_second": 109.754,
"eval_steps_per_second": 3.136,
"step": 376
},
{
"epoch": 189.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5444640517234802,
"eval_runtime": 0.6708,
"eval_samples_per_second": 104.36,
"eval_steps_per_second": 2.982,
"step": 378
},
{
"epoch": 190.0,
"learning_rate": 6.25e-06,
"loss": 0.608,
"step": 380
},
{
"epoch": 190.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.544407069683075,
"eval_runtime": 0.8392,
"eval_samples_per_second": 83.416,
"eval_steps_per_second": 2.383,
"step": 380
},
{
"epoch": 191.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5443536043167114,
"eval_runtime": 0.6405,
"eval_samples_per_second": 109.293,
"eval_steps_per_second": 3.123,
"step": 382
},
{
"epoch": 192.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5443087816238403,
"eval_runtime": 0.6431,
"eval_samples_per_second": 108.85,
"eval_steps_per_second": 3.11,
"step": 384
},
{
"epoch": 193.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5442724823951721,
"eval_runtime": 0.8311,
"eval_samples_per_second": 84.221,
"eval_steps_per_second": 2.406,
"step": 386
},
{
"epoch": 194.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5442416667938232,
"eval_runtime": 0.6416,
"eval_samples_per_second": 109.095,
"eval_steps_per_second": 3.117,
"step": 388
},
{
"epoch": 195.0,
"learning_rate": 3.125e-06,
"loss": 0.6155,
"step": 390
},
{
"epoch": 195.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5442100763320923,
"eval_runtime": 0.6472,
"eval_samples_per_second": 108.158,
"eval_steps_per_second": 3.09,
"step": 390
},
{
"epoch": 196.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5441816449165344,
"eval_runtime": 0.8234,
"eval_samples_per_second": 85.016,
"eval_steps_per_second": 2.429,
"step": 392
},
{
"epoch": 197.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5441582202911377,
"eval_runtime": 0.6411,
"eval_samples_per_second": 109.183,
"eval_steps_per_second": 3.12,
"step": 394
},
{
"epoch": 198.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5441429615020752,
"eval_runtime": 0.6367,
"eval_samples_per_second": 109.941,
"eval_steps_per_second": 3.141,
"step": 396
},
{
"epoch": 199.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5441319346427917,
"eval_runtime": 0.8204,
"eval_samples_per_second": 85.32,
"eval_steps_per_second": 2.438,
"step": 398
},
{
"epoch": 200.0,
"learning_rate": 0.0,
"loss": 0.6272,
"step": 400
},
{
"epoch": 200.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.5441268086433411,
"eval_runtime": 0.646,
"eval_samples_per_second": 108.365,
"eval_steps_per_second": 3.096,
"step": 400
},
{
"epoch": 200.0,
"step": 400,
"total_flos": 2.23710151698432e+18,
"train_loss": 0.6791047298908234,
"train_runtime": 1022.1437,
"train_samples_per_second": 88.05,
"train_steps_per_second": 0.391
}
],
"logging_steps": 10,
"max_steps": 400,
"num_train_epochs": 200,
"save_steps": 500,
"total_flos": 2.23710151698432e+18,
"trial_name": null,
"trial_params": null
}