PeptideGPT_non_hemolytic / trainer_state.json
aayush14's picture
Uploaded PeptideGPT non-hemolytic model
05f67a6 verified
{
"best_metric": 3.9793689250946045,
"best_model_checkpoint": "output_hemo_neg_3/checkpoint-18392",
"epoch": 500.0,
"eval_steps": 500,
"global_step": 19000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.98e-07,
"loss": 5.9415,
"step": 38
},
{
"epoch": 1.0,
"eval_accuracy": 0.30840664711632454,
"eval_loss": 5.606841087341309,
"eval_runtime": 0.5994,
"eval_samples_per_second": 6.673,
"eval_steps_per_second": 1.668,
"step": 38
},
{
"epoch": 2.0,
"learning_rate": 9.959999999999999e-07,
"loss": 5.7302,
"step": 76
},
{
"epoch": 2.0,
"eval_accuracy": 0.32038123167155425,
"eval_loss": 5.426336288452148,
"eval_runtime": 0.6035,
"eval_samples_per_second": 6.628,
"eval_steps_per_second": 1.657,
"step": 76
},
{
"epoch": 3.0,
"learning_rate": 9.94e-07,
"loss": 5.5675,
"step": 114
},
{
"epoch": 3.0,
"eval_accuracy": 0.323069403714565,
"eval_loss": 5.287517070770264,
"eval_runtime": 0.6089,
"eval_samples_per_second": 6.57,
"eval_steps_per_second": 1.642,
"step": 114
},
{
"epoch": 4.0,
"learning_rate": 9.92e-07,
"loss": 5.4594,
"step": 152
},
{
"epoch": 4.0,
"eval_accuracy": 0.3250244379276637,
"eval_loss": 5.205501556396484,
"eval_runtime": 0.6097,
"eval_samples_per_second": 6.56,
"eval_steps_per_second": 1.64,
"step": 152
},
{
"epoch": 5.0,
"learning_rate": 9.9e-07,
"loss": 5.3808,
"step": 190
},
{
"epoch": 5.0,
"eval_accuracy": 0.3296676441837732,
"eval_loss": 5.158883094787598,
"eval_runtime": 0.6099,
"eval_samples_per_second": 6.558,
"eval_steps_per_second": 1.639,
"step": 190
},
{
"epoch": 6.0,
"learning_rate": 9.88e-07,
"loss": 5.3353,
"step": 228
},
{
"epoch": 6.0,
"eval_accuracy": 0.3321114369501466,
"eval_loss": 5.119546413421631,
"eval_runtime": 0.6108,
"eval_samples_per_second": 6.549,
"eval_steps_per_second": 1.637,
"step": 228
},
{
"epoch": 7.0,
"learning_rate": 9.86e-07,
"loss": 5.2946,
"step": 266
},
{
"epoch": 7.0,
"eval_accuracy": 0.333822091886608,
"eval_loss": 5.077916622161865,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 266
},
{
"epoch": 8.0,
"learning_rate": 9.84e-07,
"loss": 5.2632,
"step": 304
},
{
"epoch": 8.0,
"eval_accuracy": 0.33699902248289343,
"eval_loss": 5.043184280395508,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 304
},
{
"epoch": 9.0,
"learning_rate": 9.819999999999999e-07,
"loss": 5.2279,
"step": 342
},
{
"epoch": 9.0,
"eval_accuracy": 0.33724340175953077,
"eval_loss": 5.015382766723633,
"eval_runtime": 0.6122,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.634,
"step": 342
},
{
"epoch": 10.0,
"learning_rate": 9.8e-07,
"loss": 5.1999,
"step": 380
},
{
"epoch": 10.0,
"eval_accuracy": 0.33773216031280545,
"eval_loss": 4.993128299713135,
"eval_runtime": 0.612,
"eval_samples_per_second": 6.535,
"eval_steps_per_second": 1.634,
"step": 380
},
{
"epoch": 11.0,
"learning_rate": 9.78e-07,
"loss": 5.1853,
"step": 418
},
{
"epoch": 11.0,
"eval_accuracy": 0.3399315738025415,
"eval_loss": 4.970090389251709,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 418
},
{
"epoch": 12.0,
"learning_rate": 9.759999999999998e-07,
"loss": 5.1619,
"step": 456
},
{
"epoch": 12.0,
"eval_accuracy": 0.3428641251221896,
"eval_loss": 4.94577693939209,
"eval_runtime": 0.6122,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.634,
"step": 456
},
{
"epoch": 13.0,
"learning_rate": 9.74e-07,
"loss": 5.1395,
"step": 494
},
{
"epoch": 13.0,
"eval_accuracy": 0.34384164222873903,
"eval_loss": 4.927363395690918,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 494
},
{
"epoch": 14.0,
"learning_rate": 9.72e-07,
"loss": 5.1179,
"step": 532
},
{
"epoch": 14.0,
"eval_accuracy": 0.34628543499511244,
"eval_loss": 4.908015251159668,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 532
},
{
"epoch": 15.0,
"learning_rate": 9.7e-07,
"loss": 5.1048,
"step": 570
},
{
"epoch": 15.0,
"eval_accuracy": 0.3465298142717498,
"eval_loss": 4.892131805419922,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 570
},
{
"epoch": 16.0,
"learning_rate": 9.679999999999999e-07,
"loss": 5.0837,
"step": 608
},
{
"epoch": 16.0,
"eval_accuracy": 0.34701857282502446,
"eval_loss": 4.875644683837891,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 608
},
{
"epoch": 17.0,
"learning_rate": 9.66e-07,
"loss": 5.067,
"step": 646
},
{
"epoch": 17.0,
"eval_accuracy": 0.34921798631476053,
"eval_loss": 4.860612869262695,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 646
},
{
"epoch": 18.0,
"learning_rate": 9.64e-07,
"loss": 5.0516,
"step": 684
},
{
"epoch": 18.0,
"eval_accuracy": 0.3506842619745846,
"eval_loss": 4.846870422363281,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 684
},
{
"epoch": 19.0,
"learning_rate": 9.619999999999999e-07,
"loss": 5.0313,
"step": 722
},
{
"epoch": 19.0,
"eval_accuracy": 0.3521505376344086,
"eval_loss": 4.836608409881592,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 722
},
{
"epoch": 20.0,
"learning_rate": 9.6e-07,
"loss": 5.0225,
"step": 760
},
{
"epoch": 20.0,
"eval_accuracy": 0.3526392961876833,
"eval_loss": 4.827553749084473,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 760
},
{
"epoch": 21.0,
"learning_rate": 9.58e-07,
"loss": 5.0068,
"step": 798
},
{
"epoch": 21.0,
"eval_accuracy": 0.3521505376344086,
"eval_loss": 4.817898273468018,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.633,
"step": 798
},
{
"epoch": 22.0,
"learning_rate": 9.559999999999998e-07,
"loss": 4.9942,
"step": 836
},
{
"epoch": 22.0,
"eval_accuracy": 0.3521505376344086,
"eval_loss": 4.805068016052246,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 836
},
{
"epoch": 23.0,
"learning_rate": 9.539999999999999e-07,
"loss": 4.9758,
"step": 874
},
{
"epoch": 23.0,
"eval_accuracy": 0.3526392961876833,
"eval_loss": 4.796260356903076,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 874
},
{
"epoch": 24.0,
"learning_rate": 9.52e-07,
"loss": 4.9605,
"step": 912
},
{
"epoch": 24.0,
"eval_accuracy": 0.35288367546432065,
"eval_loss": 4.78426456451416,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.633,
"step": 912
},
{
"epoch": 25.0,
"learning_rate": 9.499999999999999e-07,
"loss": 4.9525,
"step": 950
},
{
"epoch": 25.0,
"eval_accuracy": 0.353128054740958,
"eval_loss": 4.772826671600342,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 950
},
{
"epoch": 26.0,
"learning_rate": 9.479999999999999e-07,
"loss": 4.9409,
"step": 988
},
{
"epoch": 26.0,
"eval_accuracy": 0.35239491691104596,
"eval_loss": 4.761840343475342,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 988
},
{
"epoch": 27.0,
"learning_rate": 9.459999999999999e-07,
"loss": 4.9328,
"step": 1026
},
{
"epoch": 27.0,
"eval_accuracy": 0.3519061583577713,
"eval_loss": 4.75234317779541,
"eval_runtime": 0.6227,
"eval_samples_per_second": 6.424,
"eval_steps_per_second": 1.606,
"step": 1026
},
{
"epoch": 28.0,
"learning_rate": 9.439999999999999e-07,
"loss": 4.9168,
"step": 1064
},
{
"epoch": 28.0,
"eval_accuracy": 0.3526392961876833,
"eval_loss": 4.744428634643555,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 1064
},
{
"epoch": 29.0,
"learning_rate": 9.419999999999999e-07,
"loss": 4.9057,
"step": 1102
},
{
"epoch": 29.0,
"eval_accuracy": 0.3550830889540567,
"eval_loss": 4.733183860778809,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 1102
},
{
"epoch": 30.0,
"learning_rate": 9.399999999999999e-07,
"loss": 4.8896,
"step": 1140
},
{
"epoch": 30.0,
"eval_accuracy": 0.3560606060606061,
"eval_loss": 4.723690986633301,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 1140
},
{
"epoch": 31.0,
"learning_rate": 9.379999999999998e-07,
"loss": 4.8869,
"step": 1178
},
{
"epoch": 31.0,
"eval_accuracy": 0.35654936461388076,
"eval_loss": 4.715620994567871,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 1178
},
{
"epoch": 32.0,
"learning_rate": 9.36e-07,
"loss": 4.8798,
"step": 1216
},
{
"epoch": 32.0,
"eval_accuracy": 0.3567937438905181,
"eval_loss": 4.709283828735352,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 1216
},
{
"epoch": 33.0,
"learning_rate": 9.34e-07,
"loss": 4.8591,
"step": 1254
},
{
"epoch": 33.0,
"eval_accuracy": 0.3575268817204301,
"eval_loss": 4.702914714813232,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 1254
},
{
"epoch": 34.0,
"learning_rate": 9.32e-07,
"loss": 4.8548,
"step": 1292
},
{
"epoch": 34.0,
"eval_accuracy": 0.35703812316715544,
"eval_loss": 4.694584369659424,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 1292
},
{
"epoch": 35.0,
"learning_rate": 9.3e-07,
"loss": 4.8502,
"step": 1330
},
{
"epoch": 35.0,
"eval_accuracy": 0.35948191593352885,
"eval_loss": 4.687127590179443,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 1330
},
{
"epoch": 36.0,
"learning_rate": 9.28e-07,
"loss": 4.8378,
"step": 1368
},
{
"epoch": 36.0,
"eval_accuracy": 0.35948191593352885,
"eval_loss": 4.680301189422607,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 1368
},
{
"epoch": 37.0,
"learning_rate": 9.26e-07,
"loss": 4.829,
"step": 1406
},
{
"epoch": 37.0,
"eval_accuracy": 0.35997067448680353,
"eval_loss": 4.673268795013428,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 1406
},
{
"epoch": 38.0,
"learning_rate": 9.24e-07,
"loss": 4.8177,
"step": 1444
},
{
"epoch": 38.0,
"eval_accuracy": 0.3602150537634409,
"eval_loss": 4.66432523727417,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 1444
},
{
"epoch": 39.0,
"learning_rate": 9.22e-07,
"loss": 4.809,
"step": 1482
},
{
"epoch": 39.0,
"eval_accuracy": 0.36070381231671556,
"eval_loss": 4.6591081619262695,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 1482
},
{
"epoch": 40.0,
"learning_rate": 9.2e-07,
"loss": 4.8002,
"step": 1520
},
{
"epoch": 40.0,
"eval_accuracy": 0.36070381231671556,
"eval_loss": 4.650698661804199,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 1520
},
{
"epoch": 41.0,
"learning_rate": 9.18e-07,
"loss": 4.7938,
"step": 1558
},
{
"epoch": 41.0,
"eval_accuracy": 0.3614369501466276,
"eval_loss": 4.643824577331543,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 1558
},
{
"epoch": 42.0,
"learning_rate": 9.16e-07,
"loss": 4.7787,
"step": 1596
},
{
"epoch": 42.0,
"eval_accuracy": 0.3616813294232649,
"eval_loss": 4.636685848236084,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 1596
},
{
"epoch": 43.0,
"learning_rate": 9.14e-07,
"loss": 4.7685,
"step": 1634
},
{
"epoch": 43.0,
"eval_accuracy": 0.3629032258064516,
"eval_loss": 4.630648136138916,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 1634
},
{
"epoch": 44.0,
"learning_rate": 9.12e-07,
"loss": 4.762,
"step": 1672
},
{
"epoch": 44.0,
"eval_accuracy": 0.36363636363636365,
"eval_loss": 4.621088981628418,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 1672
},
{
"epoch": 45.0,
"learning_rate": 9.1e-07,
"loss": 4.7487,
"step": 1710
},
{
"epoch": 45.0,
"eval_accuracy": 0.36412512218963833,
"eval_loss": 4.61327600479126,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 1710
},
{
"epoch": 46.0,
"learning_rate": 9.08e-07,
"loss": 4.7451,
"step": 1748
},
{
"epoch": 46.0,
"eval_accuracy": 0.364613880742913,
"eval_loss": 4.605830669403076,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 1748
},
{
"epoch": 47.0,
"learning_rate": 9.06e-07,
"loss": 4.7378,
"step": 1786
},
{
"epoch": 47.0,
"eval_accuracy": 0.3658357771260997,
"eval_loss": 4.600909233093262,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 1786
},
{
"epoch": 48.0,
"learning_rate": 9.039999999999999e-07,
"loss": 4.7281,
"step": 1824
},
{
"epoch": 48.0,
"eval_accuracy": 0.3658357771260997,
"eval_loss": 4.5931782722473145,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 1824
},
{
"epoch": 49.0,
"learning_rate": 9.02e-07,
"loss": 4.7196,
"step": 1862
},
{
"epoch": 49.0,
"eval_accuracy": 0.3655913978494624,
"eval_loss": 4.5888590812683105,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 1862
},
{
"epoch": 50.0,
"learning_rate": 9e-07,
"loss": 4.7091,
"step": 1900
},
{
"epoch": 50.0,
"eval_accuracy": 0.36656891495601174,
"eval_loss": 4.581442356109619,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 1900
},
{
"epoch": 51.0,
"learning_rate": 8.98e-07,
"loss": 4.7032,
"step": 1938
},
{
"epoch": 51.0,
"eval_accuracy": 0.3668132942326491,
"eval_loss": 4.5762939453125,
"eval_runtime": 0.6259,
"eval_samples_per_second": 6.391,
"eval_steps_per_second": 1.598,
"step": 1938
},
{
"epoch": 52.0,
"learning_rate": 8.96e-07,
"loss": 4.6978,
"step": 1976
},
{
"epoch": 52.0,
"eval_accuracy": 0.3668132942326491,
"eval_loss": 4.573066711425781,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.633,
"step": 1976
},
{
"epoch": 53.0,
"learning_rate": 8.939999999999999e-07,
"loss": 4.6908,
"step": 2014
},
{
"epoch": 53.0,
"eval_accuracy": 0.36730205278592376,
"eval_loss": 4.5681657791137695,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 2014
},
{
"epoch": 54.0,
"learning_rate": 8.92e-07,
"loss": 4.6776,
"step": 2052
},
{
"epoch": 54.0,
"eval_accuracy": 0.36730205278592376,
"eval_loss": 4.56380558013916,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 2052
},
{
"epoch": 55.0,
"learning_rate": 8.9e-07,
"loss": 4.6667,
"step": 2090
},
{
"epoch": 55.0,
"eval_accuracy": 0.3680351906158358,
"eval_loss": 4.558794975280762,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 2090
},
{
"epoch": 56.0,
"learning_rate": 8.88e-07,
"loss": 4.6662,
"step": 2128
},
{
"epoch": 56.0,
"eval_accuracy": 0.36852394916911047,
"eval_loss": 4.5535197257995605,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 2128
},
{
"epoch": 57.0,
"learning_rate": 8.86e-07,
"loss": 4.6567,
"step": 2166
},
{
"epoch": 57.0,
"eval_accuracy": 0.36974584555229717,
"eval_loss": 4.549376964569092,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 2166
},
{
"epoch": 58.0,
"learning_rate": 8.839999999999999e-07,
"loss": 4.6492,
"step": 2204
},
{
"epoch": 58.0,
"eval_accuracy": 0.36974584555229717,
"eval_loss": 4.543338298797607,
"eval_runtime": 0.6159,
"eval_samples_per_second": 6.494,
"eval_steps_per_second": 1.624,
"step": 2204
},
{
"epoch": 59.0,
"learning_rate": 8.82e-07,
"loss": 4.6442,
"step": 2242
},
{
"epoch": 59.0,
"eval_accuracy": 0.36974584555229717,
"eval_loss": 4.5420732498168945,
"eval_runtime": 0.6226,
"eval_samples_per_second": 6.424,
"eval_steps_per_second": 1.606,
"step": 2242
},
{
"epoch": 60.0,
"learning_rate": 8.799999999999999e-07,
"loss": 4.632,
"step": 2280
},
{
"epoch": 60.0,
"eval_accuracy": 0.3699902248289345,
"eval_loss": 4.5368475914001465,
"eval_runtime": 0.615,
"eval_samples_per_second": 6.504,
"eval_steps_per_second": 1.626,
"step": 2280
},
{
"epoch": 61.0,
"learning_rate": 8.78e-07,
"loss": 4.6256,
"step": 2318
},
{
"epoch": 61.0,
"eval_accuracy": 0.3704789833822092,
"eval_loss": 4.532083511352539,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 2318
},
{
"epoch": 62.0,
"learning_rate": 8.76e-07,
"loss": 4.6215,
"step": 2356
},
{
"epoch": 62.0,
"eval_accuracy": 0.3699902248289345,
"eval_loss": 4.528621673583984,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 2356
},
{
"epoch": 63.0,
"learning_rate": 8.739999999999999e-07,
"loss": 4.6142,
"step": 2394
},
{
"epoch": 63.0,
"eval_accuracy": 0.37023460410557185,
"eval_loss": 4.524003982543945,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 2394
},
{
"epoch": 64.0,
"learning_rate": 8.72e-07,
"loss": 4.6041,
"step": 2432
},
{
"epoch": 64.0,
"eval_accuracy": 0.3709677419354839,
"eval_loss": 4.519542694091797,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 2432
},
{
"epoch": 65.0,
"learning_rate": 8.699999999999999e-07,
"loss": 4.5984,
"step": 2470
},
{
"epoch": 65.0,
"eval_accuracy": 0.37145650048875856,
"eval_loss": 4.514742851257324,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 2470
},
{
"epoch": 66.0,
"learning_rate": 8.68e-07,
"loss": 4.5919,
"step": 2508
},
{
"epoch": 66.0,
"eval_accuracy": 0.37267839687194526,
"eval_loss": 4.511608600616455,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 2508
},
{
"epoch": 67.0,
"learning_rate": 8.659999999999999e-07,
"loss": 4.5838,
"step": 2546
},
{
"epoch": 67.0,
"eval_accuracy": 0.3724340175953079,
"eval_loss": 4.5069780349731445,
"eval_runtime": 0.6152,
"eval_samples_per_second": 6.502,
"eval_steps_per_second": 1.626,
"step": 2546
},
{
"epoch": 68.0,
"learning_rate": 8.639999999999999e-07,
"loss": 4.5733,
"step": 2584
},
{
"epoch": 68.0,
"eval_accuracy": 0.3724340175953079,
"eval_loss": 4.503517150878906,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 2584
},
{
"epoch": 69.0,
"learning_rate": 8.62e-07,
"loss": 4.5642,
"step": 2622
},
{
"epoch": 69.0,
"eval_accuracy": 0.3721896383186706,
"eval_loss": 4.500667095184326,
"eval_runtime": 0.6248,
"eval_samples_per_second": 6.402,
"eval_steps_per_second": 1.6,
"step": 2622
},
{
"epoch": 70.0,
"learning_rate": 8.599999999999999e-07,
"loss": 4.5607,
"step": 2660
},
{
"epoch": 70.0,
"eval_accuracy": 0.37194525904203324,
"eval_loss": 4.4967780113220215,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.627,
"step": 2660
},
{
"epoch": 71.0,
"learning_rate": 8.58e-07,
"loss": 4.5543,
"step": 2698
},
{
"epoch": 71.0,
"eval_accuracy": 0.3729227761485826,
"eval_loss": 4.492751121520996,
"eval_runtime": 0.615,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 2698
},
{
"epoch": 72.0,
"learning_rate": 8.559999999999999e-07,
"loss": 4.5502,
"step": 2736
},
{
"epoch": 72.0,
"eval_accuracy": 0.3729227761485826,
"eval_loss": 4.489741325378418,
"eval_runtime": 0.6243,
"eval_samples_per_second": 6.407,
"eval_steps_per_second": 1.602,
"step": 2736
},
{
"epoch": 73.0,
"learning_rate": 8.539999999999999e-07,
"loss": 4.5505,
"step": 2774
},
{
"epoch": 73.0,
"eval_accuracy": 0.3736559139784946,
"eval_loss": 4.487486839294434,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 2774
},
{
"epoch": 74.0,
"learning_rate": 8.52e-07,
"loss": 4.537,
"step": 2812
},
{
"epoch": 74.0,
"eval_accuracy": 0.37316715542521994,
"eval_loss": 4.483956813812256,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 2812
},
{
"epoch": 75.0,
"learning_rate": 8.499999999999999e-07,
"loss": 4.529,
"step": 2850
},
{
"epoch": 75.0,
"eval_accuracy": 0.374633431085044,
"eval_loss": 4.480215549468994,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 2850
},
{
"epoch": 76.0,
"learning_rate": 8.48e-07,
"loss": 4.5201,
"step": 2888
},
{
"epoch": 76.0,
"eval_accuracy": 0.37487781036168133,
"eval_loss": 4.4763689041137695,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 2888
},
{
"epoch": 77.0,
"learning_rate": 8.459999999999999e-07,
"loss": 4.5176,
"step": 2926
},
{
"epoch": 77.0,
"eval_accuracy": 0.37512218963831867,
"eval_loss": 4.472899436950684,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 2926
},
{
"epoch": 78.0,
"learning_rate": 8.439999999999999e-07,
"loss": 4.5087,
"step": 2964
},
{
"epoch": 78.0,
"eval_accuracy": 0.37512218963831867,
"eval_loss": 4.4715986251831055,
"eval_runtime": 0.627,
"eval_samples_per_second": 6.379,
"eval_steps_per_second": 1.595,
"step": 2964
},
{
"epoch": 79.0,
"learning_rate": 8.419999999999999e-07,
"loss": 4.504,
"step": 3002
},
{
"epoch": 79.0,
"eval_accuracy": 0.37438905180840665,
"eval_loss": 4.468360900878906,
"eval_runtime": 0.6153,
"eval_samples_per_second": 6.501,
"eval_steps_per_second": 1.625,
"step": 3002
},
{
"epoch": 80.0,
"learning_rate": 8.399999999999999e-07,
"loss": 4.4914,
"step": 3040
},
{
"epoch": 80.0,
"eval_accuracy": 0.37512218963831867,
"eval_loss": 4.463363170623779,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 3040
},
{
"epoch": 81.0,
"learning_rate": 8.38e-07,
"loss": 4.4907,
"step": 3078
},
{
"epoch": 81.0,
"eval_accuracy": 0.37512218963831867,
"eval_loss": 4.461572170257568,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 3078
},
{
"epoch": 82.0,
"learning_rate": 8.359999999999999e-07,
"loss": 4.483,
"step": 3116
},
{
"epoch": 82.0,
"eval_accuracy": 0.375366568914956,
"eval_loss": 4.45780086517334,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 3116
},
{
"epoch": 83.0,
"learning_rate": 8.34e-07,
"loss": 4.4792,
"step": 3154
},
{
"epoch": 83.0,
"eval_accuracy": 0.3741446725317693,
"eval_loss": 4.454073429107666,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 3154
},
{
"epoch": 84.0,
"learning_rate": 8.319999999999999e-07,
"loss": 4.4705,
"step": 3192
},
{
"epoch": 84.0,
"eval_accuracy": 0.37438905180840665,
"eval_loss": 4.451131820678711,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 3192
},
{
"epoch": 85.0,
"learning_rate": 8.299999999999999e-07,
"loss": 4.4647,
"step": 3230
},
{
"epoch": 85.0,
"eval_accuracy": 0.37487781036168133,
"eval_loss": 4.448835372924805,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 3230
},
{
"epoch": 86.0,
"learning_rate": 8.28e-07,
"loss": 4.4617,
"step": 3268
},
{
"epoch": 86.0,
"eval_accuracy": 0.37512218963831867,
"eval_loss": 4.444460391998291,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 3268
},
{
"epoch": 87.0,
"learning_rate": 8.259999999999999e-07,
"loss": 4.453,
"step": 3306
},
{
"epoch": 87.0,
"eval_accuracy": 0.37512218963831867,
"eval_loss": 4.438481330871582,
"eval_runtime": 0.6245,
"eval_samples_per_second": 6.405,
"eval_steps_per_second": 1.601,
"step": 3306
},
{
"epoch": 88.0,
"learning_rate": 8.24e-07,
"loss": 4.4488,
"step": 3344
},
{
"epoch": 88.0,
"eval_accuracy": 0.3763440860215054,
"eval_loss": 4.435319423675537,
"eval_runtime": 0.6159,
"eval_samples_per_second": 6.494,
"eval_steps_per_second": 1.624,
"step": 3344
},
{
"epoch": 89.0,
"learning_rate": 8.219999999999999e-07,
"loss": 4.4424,
"step": 3382
},
{
"epoch": 89.0,
"eval_accuracy": 0.3765884652981427,
"eval_loss": 4.432227611541748,
"eval_runtime": 0.6169,
"eval_samples_per_second": 6.484,
"eval_steps_per_second": 1.621,
"step": 3382
},
{
"epoch": 90.0,
"learning_rate": 8.199999999999999e-07,
"loss": 4.433,
"step": 3420
},
{
"epoch": 90.0,
"eval_accuracy": 0.3765884652981427,
"eval_loss": 4.4299702644348145,
"eval_runtime": 0.6255,
"eval_samples_per_second": 6.395,
"eval_steps_per_second": 1.599,
"step": 3420
},
{
"epoch": 91.0,
"learning_rate": 8.179999999999999e-07,
"loss": 4.4252,
"step": 3458
},
{
"epoch": 91.0,
"eval_accuracy": 0.3763440860215054,
"eval_loss": 4.425891399383545,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 3458
},
{
"epoch": 92.0,
"learning_rate": 8.159999999999999e-07,
"loss": 4.4226,
"step": 3496
},
{
"epoch": 92.0,
"eval_accuracy": 0.37732160312805474,
"eval_loss": 4.421455383300781,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 3496
},
{
"epoch": 93.0,
"learning_rate": 8.14e-07,
"loss": 4.4144,
"step": 3534
},
{
"epoch": 93.0,
"eval_accuracy": 0.3770772238514174,
"eval_loss": 4.41888427734375,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 3534
},
{
"epoch": 94.0,
"learning_rate": 8.12e-07,
"loss": 4.4047,
"step": 3572
},
{
"epoch": 94.0,
"eval_accuracy": 0.3770772238514174,
"eval_loss": 4.416011333465576,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.633,
"step": 3572
},
{
"epoch": 95.0,
"learning_rate": 8.1e-07,
"loss": 4.4071,
"step": 3610
},
{
"epoch": 95.0,
"eval_accuracy": 0.37732160312805474,
"eval_loss": 4.413094997406006,
"eval_runtime": 0.6249,
"eval_samples_per_second": 6.401,
"eval_steps_per_second": 1.6,
"step": 3610
},
{
"epoch": 96.0,
"learning_rate": 8.08e-07,
"loss": 4.3975,
"step": 3648
},
{
"epoch": 96.0,
"eval_accuracy": 0.37732160312805474,
"eval_loss": 4.409505367279053,
"eval_runtime": 0.6123,
"eval_samples_per_second": 6.532,
"eval_steps_per_second": 1.633,
"step": 3648
},
{
"epoch": 97.0,
"learning_rate": 8.06e-07,
"loss": 4.3897,
"step": 3686
},
{
"epoch": 97.0,
"eval_accuracy": 0.3770772238514174,
"eval_loss": 4.408539772033691,
"eval_runtime": 1.9183,
"eval_samples_per_second": 2.085,
"eval_steps_per_second": 0.521,
"step": 3686
},
{
"epoch": 98.0,
"learning_rate": 8.04e-07,
"loss": 4.3869,
"step": 3724
},
{
"epoch": 98.0,
"eval_accuracy": 0.3770772238514174,
"eval_loss": 4.405216693878174,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 3724
},
{
"epoch": 99.0,
"learning_rate": 8.02e-07,
"loss": 4.3751,
"step": 3762
},
{
"epoch": 99.0,
"eval_accuracy": 0.37732160312805474,
"eval_loss": 4.402120113372803,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 3762
},
{
"epoch": 100.0,
"learning_rate": 8e-07,
"loss": 4.3698,
"step": 3800
},
{
"epoch": 100.0,
"eval_accuracy": 0.37683284457478006,
"eval_loss": 4.398764610290527,
"eval_runtime": 0.6238,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 1.603,
"step": 3800
},
{
"epoch": 101.0,
"learning_rate": 7.98e-07,
"loss": 4.368,
"step": 3838
},
{
"epoch": 101.0,
"eval_accuracy": 0.37683284457478006,
"eval_loss": 4.394458293914795,
"eval_runtime": 0.6261,
"eval_samples_per_second": 6.388,
"eval_steps_per_second": 1.597,
"step": 3838
},
{
"epoch": 102.0,
"learning_rate": 7.96e-07,
"loss": 4.3643,
"step": 3876
},
{
"epoch": 102.0,
"eval_accuracy": 0.3770772238514174,
"eval_loss": 4.391842842102051,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 3876
},
{
"epoch": 103.0,
"learning_rate": 7.94e-07,
"loss": 4.3552,
"step": 3914
},
{
"epoch": 103.0,
"eval_accuracy": 0.3765884652981427,
"eval_loss": 4.389264106750488,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 3914
},
{
"epoch": 104.0,
"learning_rate": 7.92e-07,
"loss": 4.3478,
"step": 3952
},
{
"epoch": 104.0,
"eval_accuracy": 0.3775659824046921,
"eval_loss": 4.386912822723389,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 3952
},
{
"epoch": 105.0,
"learning_rate": 7.9e-07,
"loss": 4.3438,
"step": 3990
},
{
"epoch": 105.0,
"eval_accuracy": 0.37805474095796676,
"eval_loss": 4.3847856521606445,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 3990
},
{
"epoch": 106.0,
"learning_rate": 7.88e-07,
"loss": 4.3362,
"step": 4028
},
{
"epoch": 106.0,
"eval_accuracy": 0.37732160312805474,
"eval_loss": 4.38198184967041,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 4028
},
{
"epoch": 107.0,
"learning_rate": 7.86e-07,
"loss": 4.3356,
"step": 4066
},
{
"epoch": 107.0,
"eval_accuracy": 0.3778103616813294,
"eval_loss": 4.37683629989624,
"eval_runtime": 0.6124,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 4066
},
{
"epoch": 108.0,
"learning_rate": 7.84e-07,
"loss": 4.3263,
"step": 4104
},
{
"epoch": 108.0,
"eval_accuracy": 0.3775659824046921,
"eval_loss": 4.376446723937988,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 4104
},
{
"epoch": 109.0,
"learning_rate": 7.82e-07,
"loss": 4.3238,
"step": 4142
},
{
"epoch": 109.0,
"eval_accuracy": 0.3778103616813294,
"eval_loss": 4.373225688934326,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 4142
},
{
"epoch": 110.0,
"learning_rate": 7.799999999999999e-07,
"loss": 4.3157,
"step": 4180
},
{
"epoch": 110.0,
"eval_accuracy": 0.37805474095796676,
"eval_loss": 4.369943618774414,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 4180
},
{
"epoch": 111.0,
"learning_rate": 7.78e-07,
"loss": 4.311,
"step": 4218
},
{
"epoch": 111.0,
"eval_accuracy": 0.37805474095796676,
"eval_loss": 4.367816925048828,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 4218
},
{
"epoch": 112.0,
"learning_rate": 7.76e-07,
"loss": 4.3048,
"step": 4256
},
{
"epoch": 112.0,
"eval_accuracy": 0.3787878787878788,
"eval_loss": 4.364564895629883,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 4256
},
{
"epoch": 113.0,
"learning_rate": 7.74e-07,
"loss": 4.2955,
"step": 4294
},
{
"epoch": 113.0,
"eval_accuracy": 0.37927663734115347,
"eval_loss": 4.364035606384277,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 4294
},
{
"epoch": 114.0,
"learning_rate": 7.72e-07,
"loss": 4.2914,
"step": 4332
},
{
"epoch": 114.0,
"eval_accuracy": 0.37927663734115347,
"eval_loss": 4.360426425933838,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 4332
},
{
"epoch": 115.0,
"learning_rate": 7.699999999999999e-07,
"loss": 4.286,
"step": 4370
},
{
"epoch": 115.0,
"eval_accuracy": 0.3790322580645161,
"eval_loss": 4.3580002784729,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 4370
},
{
"epoch": 116.0,
"learning_rate": 7.68e-07,
"loss": 4.2857,
"step": 4408
},
{
"epoch": 116.0,
"eval_accuracy": 0.3790322580645161,
"eval_loss": 4.354123115539551,
"eval_runtime": 0.6123,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 4408
},
{
"epoch": 117.0,
"learning_rate": 7.66e-07,
"loss": 4.2776,
"step": 4446
},
{
"epoch": 117.0,
"eval_accuracy": 0.37927663734115347,
"eval_loss": 4.352733612060547,
"eval_runtime": 0.6122,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.633,
"step": 4446
},
{
"epoch": 118.0,
"learning_rate": 7.64e-07,
"loss": 4.2734,
"step": 4484
},
{
"epoch": 118.0,
"eval_accuracy": 0.38025415444770283,
"eval_loss": 4.348194599151611,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 4484
},
{
"epoch": 119.0,
"learning_rate": 7.62e-07,
"loss": 4.2646,
"step": 4522
},
{
"epoch": 119.0,
"eval_accuracy": 0.3800097751710655,
"eval_loss": 4.346100330352783,
"eval_runtime": 0.7901,
"eval_samples_per_second": 5.062,
"eval_steps_per_second": 1.266,
"step": 4522
},
{
"epoch": 120.0,
"learning_rate": 7.599999999999999e-07,
"loss": 4.2632,
"step": 4560
},
{
"epoch": 120.0,
"eval_accuracy": 0.38025415444770283,
"eval_loss": 4.3445892333984375,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 4560
},
{
"epoch": 121.0,
"learning_rate": 7.58e-07,
"loss": 4.2586,
"step": 4598
},
{
"epoch": 121.0,
"eval_accuracy": 0.3807429130009775,
"eval_loss": 4.340865135192871,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 4598
},
{
"epoch": 122.0,
"learning_rate": 7.559999999999999e-07,
"loss": 4.2564,
"step": 4636
},
{
"epoch": 122.0,
"eval_accuracy": 0.3812316715542522,
"eval_loss": 4.3399505615234375,
"eval_runtime": 0.6124,
"eval_samples_per_second": 6.532,
"eval_steps_per_second": 1.633,
"step": 4636
},
{
"epoch": 123.0,
"learning_rate": 7.54e-07,
"loss": 4.2423,
"step": 4674
},
{
"epoch": 123.0,
"eval_accuracy": 0.3807429130009775,
"eval_loss": 4.335657596588135,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 4674
},
{
"epoch": 124.0,
"learning_rate": 7.52e-07,
"loss": 4.2425,
"step": 4712
},
{
"epoch": 124.0,
"eval_accuracy": 0.3807429130009775,
"eval_loss": 4.3334856033325195,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 4712
},
{
"epoch": 125.0,
"learning_rate": 7.5e-07,
"loss": 4.2367,
"step": 4750
},
{
"epoch": 125.0,
"eval_accuracy": 0.38098729227761485,
"eval_loss": 4.330577373504639,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 4750
},
{
"epoch": 126.0,
"learning_rate": 7.48e-07,
"loss": 4.2301,
"step": 4788
},
{
"epoch": 126.0,
"eval_accuracy": 0.38147605083088953,
"eval_loss": 4.3291544914245605,
"eval_runtime": 0.6122,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.633,
"step": 4788
},
{
"epoch": 127.0,
"learning_rate": 7.459999999999999e-07,
"loss": 4.2286,
"step": 4826
},
{
"epoch": 127.0,
"eval_accuracy": 0.3812316715542522,
"eval_loss": 4.327591419219971,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 4826
},
{
"epoch": 128.0,
"learning_rate": 7.44e-07,
"loss": 4.2184,
"step": 4864
},
{
"epoch": 128.0,
"eval_accuracy": 0.38220918866080156,
"eval_loss": 4.32462215423584,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 4864
},
{
"epoch": 129.0,
"learning_rate": 7.42e-07,
"loss": 4.2156,
"step": 4902
},
{
"epoch": 129.0,
"eval_accuracy": 0.38269794721407624,
"eval_loss": 4.3210039138793945,
"eval_runtime": 0.6123,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 4902
},
{
"epoch": 130.0,
"learning_rate": 7.4e-07,
"loss": 4.2116,
"step": 4940
},
{
"epoch": 130.0,
"eval_accuracy": 0.38343108504398826,
"eval_loss": 4.318737506866455,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 4940
},
{
"epoch": 131.0,
"learning_rate": 7.38e-07,
"loss": 4.2008,
"step": 4978
},
{
"epoch": 131.0,
"eval_accuracy": 0.38343108504398826,
"eval_loss": 4.316496849060059,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 4978
},
{
"epoch": 132.0,
"learning_rate": 7.359999999999999e-07,
"loss": 4.1995,
"step": 5016
},
{
"epoch": 132.0,
"eval_accuracy": 0.38343108504398826,
"eval_loss": 4.3134074211120605,
"eval_runtime": 0.6223,
"eval_samples_per_second": 6.428,
"eval_steps_per_second": 1.607,
"step": 5016
},
{
"epoch": 133.0,
"learning_rate": 7.34e-07,
"loss": 4.19,
"step": 5054
},
{
"epoch": 133.0,
"eval_accuracy": 0.3841642228739003,
"eval_loss": 4.313587665557861,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 5054
},
{
"epoch": 134.0,
"learning_rate": 7.319999999999999e-07,
"loss": 4.1828,
"step": 5092
},
{
"epoch": 134.0,
"eval_accuracy": 0.3841642228739003,
"eval_loss": 4.311624050140381,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 5092
},
{
"epoch": 135.0,
"learning_rate": 7.3e-07,
"loss": 4.1815,
"step": 5130
},
{
"epoch": 135.0,
"eval_accuracy": 0.38465298142717497,
"eval_loss": 4.306524276733398,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 5130
},
{
"epoch": 136.0,
"learning_rate": 7.28e-07,
"loss": 4.1771,
"step": 5168
},
{
"epoch": 136.0,
"eval_accuracy": 0.38391984359726294,
"eval_loss": 4.305095195770264,
"eval_runtime": 0.6123,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 5168
},
{
"epoch": 137.0,
"learning_rate": 7.259999999999999e-07,
"loss": 4.1744,
"step": 5206
},
{
"epoch": 137.0,
"eval_accuracy": 0.38465298142717497,
"eval_loss": 4.301632881164551,
"eval_runtime": 1.3483,
"eval_samples_per_second": 2.967,
"eval_steps_per_second": 0.742,
"step": 5206
},
{
"epoch": 138.0,
"learning_rate": 7.24e-07,
"loss": 4.1717,
"step": 5244
},
{
"epoch": 138.0,
"eval_accuracy": 0.38465298142717497,
"eval_loss": 4.297549247741699,
"eval_runtime": 0.6216,
"eval_samples_per_second": 6.435,
"eval_steps_per_second": 1.609,
"step": 5244
},
{
"epoch": 139.0,
"learning_rate": 7.219999999999999e-07,
"loss": 4.1616,
"step": 5282
},
{
"epoch": 139.0,
"eval_accuracy": 0.38465298142717497,
"eval_loss": 4.296638488769531,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 5282
},
{
"epoch": 140.0,
"learning_rate": 7.2e-07,
"loss": 4.1582,
"step": 5320
},
{
"epoch": 140.0,
"eval_accuracy": 0.38465298142717497,
"eval_loss": 4.29475212097168,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 5320
},
{
"epoch": 141.0,
"learning_rate": 7.179999999999999e-07,
"loss": 4.1583,
"step": 5358
},
{
"epoch": 141.0,
"eval_accuracy": 0.3848973607038123,
"eval_loss": 4.293056488037109,
"eval_runtime": 3.2385,
"eval_samples_per_second": 1.235,
"eval_steps_per_second": 0.309,
"step": 5358
},
{
"epoch": 142.0,
"learning_rate": 7.159999999999999e-07,
"loss": 4.148,
"step": 5396
},
{
"epoch": 142.0,
"eval_accuracy": 0.385386119257087,
"eval_loss": 4.289401054382324,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 5396
},
{
"epoch": 143.0,
"learning_rate": 7.14e-07,
"loss": 4.1417,
"step": 5434
},
{
"epoch": 143.0,
"eval_accuracy": 0.3848973607038123,
"eval_loss": 4.286114692687988,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 5434
},
{
"epoch": 144.0,
"learning_rate": 7.119999999999999e-07,
"loss": 4.1386,
"step": 5472
},
{
"epoch": 144.0,
"eval_accuracy": 0.386119257086999,
"eval_loss": 4.286536693572998,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 5472
},
{
"epoch": 145.0,
"learning_rate": 7.1e-07,
"loss": 4.133,
"step": 5510
},
{
"epoch": 145.0,
"eval_accuracy": 0.386119257086999,
"eval_loss": 4.283446311950684,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 5510
},
{
"epoch": 146.0,
"learning_rate": 7.079999999999999e-07,
"loss": 4.129,
"step": 5548
},
{
"epoch": 146.0,
"eval_accuracy": 0.38636363636363635,
"eval_loss": 4.279318332672119,
"eval_runtime": 0.6163,
"eval_samples_per_second": 6.491,
"eval_steps_per_second": 1.623,
"step": 5548
},
{
"epoch": 147.0,
"learning_rate": 7.059999999999999e-07,
"loss": 4.12,
"step": 5586
},
{
"epoch": 147.0,
"eval_accuracy": 0.386119257086999,
"eval_loss": 4.278520584106445,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 5586
},
{
"epoch": 148.0,
"learning_rate": 7.04e-07,
"loss": 4.1206,
"step": 5624
},
{
"epoch": 148.0,
"eval_accuracy": 0.38636363636363635,
"eval_loss": 4.274984836578369,
"eval_runtime": 0.6214,
"eval_samples_per_second": 6.437,
"eval_steps_per_second": 1.609,
"step": 5624
},
{
"epoch": 149.0,
"learning_rate": 7.019999999999999e-07,
"loss": 4.1226,
"step": 5662
},
{
"epoch": 149.0,
"eval_accuracy": 0.3870967741935484,
"eval_loss": 4.274369716644287,
"eval_runtime": 0.6123,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 5662
},
{
"epoch": 150.0,
"learning_rate": 7e-07,
"loss": 4.1104,
"step": 5700
},
{
"epoch": 150.0,
"eval_accuracy": 0.3866080156402737,
"eval_loss": 4.272345066070557,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 5700
},
{
"epoch": 151.0,
"learning_rate": 6.979999999999999e-07,
"loss": 4.1093,
"step": 5738
},
{
"epoch": 151.0,
"eval_accuracy": 0.3870967741935484,
"eval_loss": 4.267661094665527,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 5738
},
{
"epoch": 152.0,
"learning_rate": 6.959999999999999e-07,
"loss": 4.0989,
"step": 5776
},
{
"epoch": 152.0,
"eval_accuracy": 0.38685239491691104,
"eval_loss": 4.265379428863525,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 5776
},
{
"epoch": 153.0,
"learning_rate": 6.939999999999999e-07,
"loss": 4.1035,
"step": 5814
},
{
"epoch": 153.0,
"eval_accuracy": 0.3878299120234604,
"eval_loss": 4.264577865600586,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 5814
},
{
"epoch": 154.0,
"learning_rate": 6.919999999999999e-07,
"loss": 4.0949,
"step": 5852
},
{
"epoch": 154.0,
"eval_accuracy": 0.38807429130009774,
"eval_loss": 4.263481616973877,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 5852
},
{
"epoch": 155.0,
"learning_rate": 6.9e-07,
"loss": 4.0921,
"step": 5890
},
{
"epoch": 155.0,
"eval_accuracy": 0.3883186705767351,
"eval_loss": 4.260597229003906,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 5890
},
{
"epoch": 156.0,
"learning_rate": 6.879999999999999e-07,
"loss": 4.0883,
"step": 5928
},
{
"epoch": 156.0,
"eval_accuracy": 0.3885630498533724,
"eval_loss": 4.256484508514404,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 5928
},
{
"epoch": 157.0,
"learning_rate": 6.86e-07,
"loss": 4.0794,
"step": 5966
},
{
"epoch": 157.0,
"eval_accuracy": 0.38929618768328444,
"eval_loss": 4.25582218170166,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 5966
},
{
"epoch": 158.0,
"learning_rate": 6.84e-07,
"loss": 4.0754,
"step": 6004
},
{
"epoch": 158.0,
"eval_accuracy": 0.38880742913000976,
"eval_loss": 4.2530412673950195,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 6004
},
{
"epoch": 159.0,
"learning_rate": 6.82e-07,
"loss": 4.0756,
"step": 6042
},
{
"epoch": 159.0,
"eval_accuracy": 0.38929618768328444,
"eval_loss": 4.249640464782715,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 6042
},
{
"epoch": 160.0,
"learning_rate": 6.800000000000001e-07,
"loss": 4.067,
"step": 6080
},
{
"epoch": 160.0,
"eval_accuracy": 0.38880742913000976,
"eval_loss": 4.250114917755127,
"eval_runtime": 0.6115,
"eval_samples_per_second": 6.541,
"eval_steps_per_second": 1.635,
"step": 6080
},
{
"epoch": 161.0,
"learning_rate": 6.78e-07,
"loss": 4.0627,
"step": 6118
},
{
"epoch": 161.0,
"eval_accuracy": 0.3890518084066471,
"eval_loss": 4.24841833114624,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 6118
},
{
"epoch": 162.0,
"learning_rate": 6.76e-07,
"loss": 4.0586,
"step": 6156
},
{
"epoch": 162.0,
"eval_accuracy": 0.3897849462365591,
"eval_loss": 4.243945121765137,
"eval_runtime": 0.6246,
"eval_samples_per_second": 6.404,
"eval_steps_per_second": 1.601,
"step": 6156
},
{
"epoch": 163.0,
"learning_rate": 6.74e-07,
"loss": 4.0577,
"step": 6194
},
{
"epoch": 163.0,
"eval_accuracy": 0.38929618768328444,
"eval_loss": 4.243143081665039,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 6194
},
{
"epoch": 164.0,
"learning_rate": 6.72e-07,
"loss": 4.055,
"step": 6232
},
{
"epoch": 164.0,
"eval_accuracy": 0.3895405669599218,
"eval_loss": 4.239078044891357,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 6232
},
{
"epoch": 165.0,
"learning_rate": 6.7e-07,
"loss": 4.0419,
"step": 6270
},
{
"epoch": 165.0,
"eval_accuracy": 0.3895405669599218,
"eval_loss": 4.239559650421143,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 6270
},
{
"epoch": 166.0,
"learning_rate": 6.68e-07,
"loss": 4.0411,
"step": 6308
},
{
"epoch": 166.0,
"eval_accuracy": 0.3902737047898338,
"eval_loss": 4.236454486846924,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 6308
},
{
"epoch": 167.0,
"learning_rate": 6.66e-07,
"loss": 4.0405,
"step": 6346
},
{
"epoch": 167.0,
"eval_accuracy": 0.3907624633431085,
"eval_loss": 4.235616683959961,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 6346
},
{
"epoch": 168.0,
"learning_rate": 6.64e-07,
"loss": 4.0327,
"step": 6384
},
{
"epoch": 168.0,
"eval_accuracy": 0.39051808406647115,
"eval_loss": 4.234899044036865,
"eval_runtime": 0.6121,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.634,
"step": 6384
},
{
"epoch": 169.0,
"learning_rate": 6.62e-07,
"loss": 4.0262,
"step": 6422
},
{
"epoch": 169.0,
"eval_accuracy": 0.3912512218963832,
"eval_loss": 4.231151580810547,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.626,
"step": 6422
},
{
"epoch": 170.0,
"learning_rate": 6.6e-07,
"loss": 4.0252,
"step": 6460
},
{
"epoch": 170.0,
"eval_accuracy": 0.3912512218963832,
"eval_loss": 4.230025291442871,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 6460
},
{
"epoch": 171.0,
"learning_rate": 6.58e-07,
"loss": 4.0237,
"step": 6498
},
{
"epoch": 171.0,
"eval_accuracy": 0.3914956011730205,
"eval_loss": 4.225388526916504,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 6498
},
{
"epoch": 172.0,
"learning_rate": 6.56e-07,
"loss": 4.024,
"step": 6536
},
{
"epoch": 172.0,
"eval_accuracy": 0.3919843597262952,
"eval_loss": 4.224780082702637,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 6536
},
{
"epoch": 173.0,
"learning_rate": 6.54e-07,
"loss": 4.0137,
"step": 6574
},
{
"epoch": 173.0,
"eval_accuracy": 0.39222873900293254,
"eval_loss": 4.221837997436523,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 6574
},
{
"epoch": 174.0,
"learning_rate": 6.52e-07,
"loss": 4.0108,
"step": 6612
},
{
"epoch": 174.0,
"eval_accuracy": 0.3927174975562072,
"eval_loss": 4.222439765930176,
"eval_runtime": 0.6168,
"eval_samples_per_second": 6.485,
"eval_steps_per_second": 1.621,
"step": 6612
},
{
"epoch": 175.0,
"learning_rate": 6.5e-07,
"loss": 4.0037,
"step": 6650
},
{
"epoch": 175.0,
"eval_accuracy": 0.3939393939393939,
"eval_loss": 4.219006538391113,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 6650
},
{
"epoch": 176.0,
"learning_rate": 6.48e-07,
"loss": 4.0021,
"step": 6688
},
{
"epoch": 176.0,
"eval_accuracy": 0.3936950146627566,
"eval_loss": 4.218034267425537,
"eval_runtime": 0.6156,
"eval_samples_per_second": 6.498,
"eval_steps_per_second": 1.625,
"step": 6688
},
{
"epoch": 177.0,
"learning_rate": 6.46e-07,
"loss": 3.9949,
"step": 6726
},
{
"epoch": 177.0,
"eval_accuracy": 0.39418377321603126,
"eval_loss": 4.215020656585693,
"eval_runtime": 0.6221,
"eval_samples_per_second": 6.43,
"eval_steps_per_second": 1.607,
"step": 6726
},
{
"epoch": 178.0,
"learning_rate": 6.44e-07,
"loss": 3.9957,
"step": 6764
},
{
"epoch": 178.0,
"eval_accuracy": 0.3939393939393939,
"eval_loss": 4.213464260101318,
"eval_runtime": 0.6127,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 6764
},
{
"epoch": 179.0,
"learning_rate": 6.42e-07,
"loss": 3.9923,
"step": 6802
},
{
"epoch": 179.0,
"eval_accuracy": 0.39418377321603126,
"eval_loss": 4.209378242492676,
"eval_runtime": 0.6122,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.634,
"step": 6802
},
{
"epoch": 180.0,
"learning_rate": 6.4e-07,
"loss": 3.9853,
"step": 6840
},
{
"epoch": 180.0,
"eval_accuracy": 0.3949169110459433,
"eval_loss": 4.209150314331055,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 6840
},
{
"epoch": 181.0,
"learning_rate": 6.38e-07,
"loss": 3.9779,
"step": 6878
},
{
"epoch": 181.0,
"eval_accuracy": 0.3949169110459433,
"eval_loss": 4.2085700035095215,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 6878
},
{
"epoch": 182.0,
"learning_rate": 6.36e-07,
"loss": 3.9826,
"step": 6916
},
{
"epoch": 182.0,
"eval_accuracy": 0.39467253176930595,
"eval_loss": 4.204543590545654,
"eval_runtime": 0.6126,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 6916
},
{
"epoch": 183.0,
"learning_rate": 6.34e-07,
"loss": 3.9775,
"step": 6954
},
{
"epoch": 183.0,
"eval_accuracy": 0.3949169110459433,
"eval_loss": 4.201192855834961,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 6954
},
{
"epoch": 184.0,
"learning_rate": 6.319999999999999e-07,
"loss": 3.9706,
"step": 6992
},
{
"epoch": 184.0,
"eval_accuracy": 0.39613880742913,
"eval_loss": 4.200508117675781,
"eval_runtime": 0.6124,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 6992
},
{
"epoch": 185.0,
"learning_rate": 6.3e-07,
"loss": 3.9672,
"step": 7030
},
{
"epoch": 185.0,
"eval_accuracy": 0.3956500488758553,
"eval_loss": 4.19916296005249,
"eval_runtime": 0.6242,
"eval_samples_per_second": 6.408,
"eval_steps_per_second": 1.602,
"step": 7030
},
{
"epoch": 186.0,
"learning_rate": 6.28e-07,
"loss": 3.9707,
"step": 7068
},
{
"epoch": 186.0,
"eval_accuracy": 0.3966275659824047,
"eval_loss": 4.196375370025635,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 7068
},
{
"epoch": 187.0,
"learning_rate": 6.26e-07,
"loss": 3.9585,
"step": 7106
},
{
"epoch": 187.0,
"eval_accuracy": 0.39711632453567935,
"eval_loss": 4.195079326629639,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 7106
},
{
"epoch": 188.0,
"learning_rate": 6.24e-07,
"loss": 3.9552,
"step": 7144
},
{
"epoch": 188.0,
"eval_accuracy": 0.3966275659824047,
"eval_loss": 4.192666530609131,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 7144
},
{
"epoch": 189.0,
"learning_rate": 6.219999999999999e-07,
"loss": 3.9526,
"step": 7182
},
{
"epoch": 189.0,
"eval_accuracy": 0.3966275659824047,
"eval_loss": 4.1922197341918945,
"eval_runtime": 0.6118,
"eval_samples_per_second": 6.538,
"eval_steps_per_second": 1.635,
"step": 7182
},
{
"epoch": 190.0,
"learning_rate": 6.2e-07,
"loss": 3.9514,
"step": 7220
},
{
"epoch": 190.0,
"eval_accuracy": 0.396871945259042,
"eval_loss": 4.18861722946167,
"eval_runtime": 0.6118,
"eval_samples_per_second": 6.538,
"eval_steps_per_second": 1.635,
"step": 7220
},
{
"epoch": 191.0,
"learning_rate": 6.18e-07,
"loss": 3.9464,
"step": 7258
},
{
"epoch": 191.0,
"eval_accuracy": 0.39760508308895404,
"eval_loss": 4.188557147979736,
"eval_runtime": 0.667,
"eval_samples_per_second": 5.997,
"eval_steps_per_second": 1.499,
"step": 7258
},
{
"epoch": 192.0,
"learning_rate": 6.16e-07,
"loss": 3.9433,
"step": 7296
},
{
"epoch": 192.0,
"eval_accuracy": 0.3980938416422287,
"eval_loss": 4.185554504394531,
"eval_runtime": 0.6187,
"eval_samples_per_second": 6.466,
"eval_steps_per_second": 1.616,
"step": 7296
},
{
"epoch": 193.0,
"learning_rate": 6.14e-07,
"loss": 3.9378,
"step": 7334
},
{
"epoch": 193.0,
"eval_accuracy": 0.3978494623655914,
"eval_loss": 4.184579372406006,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 7334
},
{
"epoch": 194.0,
"learning_rate": 6.119999999999999e-07,
"loss": 3.9362,
"step": 7372
},
{
"epoch": 194.0,
"eval_accuracy": 0.3980938416422287,
"eval_loss": 4.1830949783325195,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 7372
},
{
"epoch": 195.0,
"learning_rate": 6.1e-07,
"loss": 3.9307,
"step": 7410
},
{
"epoch": 195.0,
"eval_accuracy": 0.3980938416422287,
"eval_loss": 4.182034969329834,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 7410
},
{
"epoch": 196.0,
"learning_rate": 6.079999999999999e-07,
"loss": 3.9324,
"step": 7448
},
{
"epoch": 196.0,
"eval_accuracy": 0.3978494623655914,
"eval_loss": 4.176692485809326,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 7448
},
{
"epoch": 197.0,
"learning_rate": 6.06e-07,
"loss": 3.9223,
"step": 7486
},
{
"epoch": 197.0,
"eval_accuracy": 0.39833822091886606,
"eval_loss": 4.179370403289795,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 7486
},
{
"epoch": 198.0,
"learning_rate": 6.04e-07,
"loss": 3.9279,
"step": 7524
},
{
"epoch": 198.0,
"eval_accuracy": 0.3985826001955034,
"eval_loss": 4.1752119064331055,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 7524
},
{
"epoch": 199.0,
"learning_rate": 6.019999999999999e-07,
"loss": 3.9214,
"step": 7562
},
{
"epoch": 199.0,
"eval_accuracy": 0.3980938416422287,
"eval_loss": 4.172707557678223,
"eval_runtime": 0.6174,
"eval_samples_per_second": 6.479,
"eval_steps_per_second": 1.62,
"step": 7562
},
{
"epoch": 200.0,
"learning_rate": 6e-07,
"loss": 3.9122,
"step": 7600
},
{
"epoch": 200.0,
"eval_accuracy": 0.39882697947214074,
"eval_loss": 4.174560070037842,
"eval_runtime": 0.7746,
"eval_samples_per_second": 5.164,
"eval_steps_per_second": 1.291,
"step": 7600
},
{
"epoch": 201.0,
"learning_rate": 5.979999999999999e-07,
"loss": 3.9099,
"step": 7638
},
{
"epoch": 201.0,
"eval_accuracy": 0.39956011730205276,
"eval_loss": 4.169778823852539,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 7638
},
{
"epoch": 202.0,
"learning_rate": 5.96e-07,
"loss": 3.9075,
"step": 7676
},
{
"epoch": 202.0,
"eval_accuracy": 0.3993157380254154,
"eval_loss": 4.169203758239746,
"eval_runtime": 0.6199,
"eval_samples_per_second": 6.452,
"eval_steps_per_second": 1.613,
"step": 7676
},
{
"epoch": 203.0,
"learning_rate": 5.939999999999999e-07,
"loss": 3.9095,
"step": 7714
},
{
"epoch": 203.0,
"eval_accuracy": 0.40004887585532745,
"eval_loss": 4.16612434387207,
"eval_runtime": 0.615,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 7714
},
{
"epoch": 204.0,
"learning_rate": 5.919999999999999e-07,
"loss": 3.9,
"step": 7752
},
{
"epoch": 204.0,
"eval_accuracy": 0.40078201368523947,
"eval_loss": 4.163661956787109,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 7752
},
{
"epoch": 205.0,
"learning_rate": 5.9e-07,
"loss": 3.9004,
"step": 7790
},
{
"epoch": 205.0,
"eval_accuracy": 0.4002932551319648,
"eval_loss": 4.161859512329102,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 7790
},
{
"epoch": 206.0,
"learning_rate": 5.879999999999999e-07,
"loss": 3.8978,
"step": 7828
},
{
"epoch": 206.0,
"eval_accuracy": 0.40053763440860213,
"eval_loss": 4.160345554351807,
"eval_runtime": 0.6636,
"eval_samples_per_second": 6.028,
"eval_steps_per_second": 1.507,
"step": 7828
},
{
"epoch": 207.0,
"learning_rate": 5.86e-07,
"loss": 3.8918,
"step": 7866
},
{
"epoch": 207.0,
"eval_accuracy": 0.40053763440860213,
"eval_loss": 4.158294677734375,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 7866
},
{
"epoch": 208.0,
"learning_rate": 5.839999999999999e-07,
"loss": 3.8848,
"step": 7904
},
{
"epoch": 208.0,
"eval_accuracy": 0.40078201368523947,
"eval_loss": 4.158019542694092,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 7904
},
{
"epoch": 209.0,
"learning_rate": 5.819999999999999e-07,
"loss": 3.8831,
"step": 7942
},
{
"epoch": 209.0,
"eval_accuracy": 0.40004887585532745,
"eval_loss": 4.1576619148254395,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 7942
},
{
"epoch": 210.0,
"learning_rate": 5.8e-07,
"loss": 3.8821,
"step": 7980
},
{
"epoch": 210.0,
"eval_accuracy": 0.40053763440860213,
"eval_loss": 4.154994487762451,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 7980
},
{
"epoch": 211.0,
"learning_rate": 5.779999999999999e-07,
"loss": 3.8818,
"step": 8018
},
{
"epoch": 211.0,
"eval_accuracy": 0.40078201368523947,
"eval_loss": 4.152185440063477,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 8018
},
{
"epoch": 212.0,
"learning_rate": 5.76e-07,
"loss": 3.8764,
"step": 8056
},
{
"epoch": 212.0,
"eval_accuracy": 0.40078201368523947,
"eval_loss": 4.152061462402344,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 8056
},
{
"epoch": 213.0,
"learning_rate": 5.739999999999999e-07,
"loss": 3.8704,
"step": 8094
},
{
"epoch": 213.0,
"eval_accuracy": 0.4010263929618768,
"eval_loss": 4.14907693862915,
"eval_runtime": 0.6221,
"eval_samples_per_second": 6.43,
"eval_steps_per_second": 1.607,
"step": 8094
},
{
"epoch": 214.0,
"learning_rate": 5.719999999999999e-07,
"loss": 3.8725,
"step": 8132
},
{
"epoch": 214.0,
"eval_accuracy": 0.4010263929618768,
"eval_loss": 4.149218559265137,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 8132
},
{
"epoch": 215.0,
"learning_rate": 5.699999999999999e-07,
"loss": 3.8698,
"step": 8170
},
{
"epoch": 215.0,
"eval_accuracy": 0.4010263929618768,
"eval_loss": 4.146964073181152,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 8170
},
{
"epoch": 216.0,
"learning_rate": 5.679999999999999e-07,
"loss": 3.8654,
"step": 8208
},
{
"epoch": 216.0,
"eval_accuracy": 0.40175953079178883,
"eval_loss": 4.146454811096191,
"eval_runtime": 0.6121,
"eval_samples_per_second": 6.535,
"eval_steps_per_second": 1.634,
"step": 8208
},
{
"epoch": 217.0,
"learning_rate": 5.66e-07,
"loss": 3.8608,
"step": 8246
},
{
"epoch": 217.0,
"eval_accuracy": 0.4020039100684262,
"eval_loss": 4.145140171051025,
"eval_runtime": 0.6127,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 8246
},
{
"epoch": 218.0,
"learning_rate": 5.639999999999999e-07,
"loss": 3.8584,
"step": 8284
},
{
"epoch": 218.0,
"eval_accuracy": 0.4015151515151515,
"eval_loss": 4.142205715179443,
"eval_runtime": 0.6251,
"eval_samples_per_second": 6.399,
"eval_steps_per_second": 1.6,
"step": 8284
},
{
"epoch": 219.0,
"learning_rate": 5.620000000000001e-07,
"loss": 3.8546,
"step": 8322
},
{
"epoch": 219.0,
"eval_accuracy": 0.40249266862170086,
"eval_loss": 4.1411662101745605,
"eval_runtime": 0.6119,
"eval_samples_per_second": 6.537,
"eval_steps_per_second": 1.634,
"step": 8322
},
{
"epoch": 220.0,
"learning_rate": 5.6e-07,
"loss": 3.8494,
"step": 8360
},
{
"epoch": 220.0,
"eval_accuracy": 0.4022482893450635,
"eval_loss": 4.140811920166016,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 8360
},
{
"epoch": 221.0,
"learning_rate": 5.58e-07,
"loss": 3.8479,
"step": 8398
},
{
"epoch": 221.0,
"eval_accuracy": 0.40249266862170086,
"eval_loss": 4.13836145401001,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 8398
},
{
"epoch": 222.0,
"learning_rate": 5.560000000000001e-07,
"loss": 3.8463,
"step": 8436
},
{
"epoch": 222.0,
"eval_accuracy": 0.40249266862170086,
"eval_loss": 4.136462688446045,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 8436
},
{
"epoch": 223.0,
"learning_rate": 5.54e-07,
"loss": 3.8422,
"step": 8474
},
{
"epoch": 223.0,
"eval_accuracy": 0.40298142717497554,
"eval_loss": 4.1326165199279785,
"eval_runtime": 0.6246,
"eval_samples_per_second": 6.404,
"eval_steps_per_second": 1.601,
"step": 8474
},
{
"epoch": 224.0,
"learning_rate": 5.520000000000001e-07,
"loss": 3.8395,
"step": 8512
},
{
"epoch": 224.0,
"eval_accuracy": 0.4022482893450635,
"eval_loss": 4.133283615112305,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 8512
},
{
"epoch": 225.0,
"learning_rate": 5.5e-07,
"loss": 3.8369,
"step": 8550
},
{
"epoch": 225.0,
"eval_accuracy": 0.4034701857282502,
"eval_loss": 4.133824825286865,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 8550
},
{
"epoch": 226.0,
"learning_rate": 5.48e-07,
"loss": 3.8357,
"step": 8588
},
{
"epoch": 226.0,
"eval_accuracy": 0.4046920821114369,
"eval_loss": 4.129902362823486,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 8588
},
{
"epoch": 227.0,
"learning_rate": 5.46e-07,
"loss": 3.8318,
"step": 8626
},
{
"epoch": 227.0,
"eval_accuracy": 0.40420332355816224,
"eval_loss": 4.129788398742676,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 8626
},
{
"epoch": 228.0,
"learning_rate": 5.44e-07,
"loss": 3.8258,
"step": 8664
},
{
"epoch": 228.0,
"eval_accuracy": 0.4039589442815249,
"eval_loss": 4.129807472229004,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 8664
},
{
"epoch": 229.0,
"learning_rate": 5.420000000000001e-07,
"loss": 3.8265,
"step": 8702
},
{
"epoch": 229.0,
"eval_accuracy": 0.4044477028347996,
"eval_loss": 4.127597332000732,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 8702
},
{
"epoch": 230.0,
"learning_rate": 5.4e-07,
"loss": 3.8229,
"step": 8740
},
{
"epoch": 230.0,
"eval_accuracy": 0.40420332355816224,
"eval_loss": 4.126589298248291,
"eval_runtime": 0.6331,
"eval_samples_per_second": 6.318,
"eval_steps_per_second": 1.58,
"step": 8740
},
{
"epoch": 231.0,
"learning_rate": 5.38e-07,
"loss": 3.8139,
"step": 8778
},
{
"epoch": 231.0,
"eval_accuracy": 0.40420332355816224,
"eval_loss": 4.125330448150635,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 8778
},
{
"epoch": 232.0,
"learning_rate": 5.36e-07,
"loss": 3.8132,
"step": 8816
},
{
"epoch": 232.0,
"eval_accuracy": 0.4046920821114369,
"eval_loss": 4.1250810623168945,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 8816
},
{
"epoch": 233.0,
"learning_rate": 5.34e-07,
"loss": 3.8126,
"step": 8854
},
{
"epoch": 233.0,
"eval_accuracy": 0.4046920821114369,
"eval_loss": 4.122879505157471,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 8854
},
{
"epoch": 234.0,
"learning_rate": 5.32e-07,
"loss": 3.8074,
"step": 8892
},
{
"epoch": 234.0,
"eval_accuracy": 0.40640273704789837,
"eval_loss": 4.121622085571289,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 8892
},
{
"epoch": 235.0,
"learning_rate": 5.3e-07,
"loss": 3.8072,
"step": 8930
},
{
"epoch": 235.0,
"eval_accuracy": 0.4066471163245357,
"eval_loss": 4.121754169464111,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 8930
},
{
"epoch": 236.0,
"learning_rate": 5.28e-07,
"loss": 3.8056,
"step": 8968
},
{
"epoch": 236.0,
"eval_accuracy": 0.4066471163245357,
"eval_loss": 4.116854667663574,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 8968
},
{
"epoch": 237.0,
"learning_rate": 5.26e-07,
"loss": 3.8038,
"step": 9006
},
{
"epoch": 237.0,
"eval_accuracy": 0.4066471163245357,
"eval_loss": 4.116855621337891,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 9006
},
{
"epoch": 238.0,
"learning_rate": 5.24e-07,
"loss": 3.8025,
"step": 9044
},
{
"epoch": 238.0,
"eval_accuracy": 0.4066471163245357,
"eval_loss": 4.115084648132324,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 9044
},
{
"epoch": 239.0,
"learning_rate": 5.22e-07,
"loss": 3.7948,
"step": 9082
},
{
"epoch": 239.0,
"eval_accuracy": 0.40689149560117305,
"eval_loss": 4.11461877822876,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 9082
},
{
"epoch": 240.0,
"learning_rate": 5.2e-07,
"loss": 3.7929,
"step": 9120
},
{
"epoch": 240.0,
"eval_accuracy": 0.4066471163245357,
"eval_loss": 4.1119794845581055,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 9120
},
{
"epoch": 241.0,
"learning_rate": 5.18e-07,
"loss": 3.7922,
"step": 9158
},
{
"epoch": 241.0,
"eval_accuracy": 0.40689149560117305,
"eval_loss": 4.111790180206299,
"eval_runtime": 0.6215,
"eval_samples_per_second": 6.436,
"eval_steps_per_second": 1.609,
"step": 9158
},
{
"epoch": 242.0,
"learning_rate": 5.16e-07,
"loss": 3.7897,
"step": 9196
},
{
"epoch": 242.0,
"eval_accuracy": 0.40762463343108507,
"eval_loss": 4.109217166900635,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 9196
},
{
"epoch": 243.0,
"learning_rate": 5.14e-07,
"loss": 3.7877,
"step": 9234
},
{
"epoch": 243.0,
"eval_accuracy": 0.4078690127077224,
"eval_loss": 4.107990741729736,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 9234
},
{
"epoch": 244.0,
"learning_rate": 5.12e-07,
"loss": 3.7829,
"step": 9272
},
{
"epoch": 244.0,
"eval_accuracy": 0.4071358748778104,
"eval_loss": 4.1082682609558105,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 9272
},
{
"epoch": 245.0,
"learning_rate": 5.1e-07,
"loss": 3.7814,
"step": 9310
},
{
"epoch": 245.0,
"eval_accuracy": 0.40762463343108507,
"eval_loss": 4.108653545379639,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 9310
},
{
"epoch": 246.0,
"learning_rate": 5.079999999999999e-07,
"loss": 3.781,
"step": 9348
},
{
"epoch": 246.0,
"eval_accuracy": 0.4071358748778104,
"eval_loss": 4.1042561531066895,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 9348
},
{
"epoch": 247.0,
"learning_rate": 5.06e-07,
"loss": 3.7728,
"step": 9386
},
{
"epoch": 247.0,
"eval_accuracy": 0.40811339198435975,
"eval_loss": 4.102220058441162,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 9386
},
{
"epoch": 248.0,
"learning_rate": 5.04e-07,
"loss": 3.779,
"step": 9424
},
{
"epoch": 248.0,
"eval_accuracy": 0.40811339198435975,
"eval_loss": 4.101465225219727,
"eval_runtime": 0.6127,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 9424
},
{
"epoch": 249.0,
"learning_rate": 5.02e-07,
"loss": 3.7716,
"step": 9462
},
{
"epoch": 249.0,
"eval_accuracy": 0.4078690127077224,
"eval_loss": 4.103041172027588,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 9462
},
{
"epoch": 250.0,
"learning_rate": 5e-07,
"loss": 3.7674,
"step": 9500
},
{
"epoch": 250.0,
"eval_accuracy": 0.4078690127077224,
"eval_loss": 4.099481105804443,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 9500
},
{
"epoch": 251.0,
"learning_rate": 4.979999999999999e-07,
"loss": 3.7665,
"step": 9538
},
{
"epoch": 251.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.0990800857543945,
"eval_runtime": 0.6218,
"eval_samples_per_second": 6.433,
"eval_steps_per_second": 1.608,
"step": 9538
},
{
"epoch": 252.0,
"learning_rate": 4.96e-07,
"loss": 3.7603,
"step": 9576
},
{
"epoch": 252.0,
"eval_accuracy": 0.40738025415444773,
"eval_loss": 4.100230693817139,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 9576
},
{
"epoch": 253.0,
"learning_rate": 4.94e-07,
"loss": 3.7645,
"step": 9614
},
{
"epoch": 253.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.095699787139893,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 9614
},
{
"epoch": 254.0,
"learning_rate": 4.92e-07,
"loss": 3.7622,
"step": 9652
},
{
"epoch": 254.0,
"eval_accuracy": 0.4083577712609971,
"eval_loss": 4.0959062576293945,
"eval_runtime": 2.189,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.457,
"step": 9652
},
{
"epoch": 255.0,
"learning_rate": 4.9e-07,
"loss": 3.7583,
"step": 9690
},
{
"epoch": 255.0,
"eval_accuracy": 0.4083577712609971,
"eval_loss": 4.0954976081848145,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 9690
},
{
"epoch": 256.0,
"learning_rate": 4.879999999999999e-07,
"loss": 3.752,
"step": 9728
},
{
"epoch": 256.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.0929741859436035,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 9728
},
{
"epoch": 257.0,
"learning_rate": 4.86e-07,
"loss": 3.7545,
"step": 9766
},
{
"epoch": 257.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.0912184715271,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 9766
},
{
"epoch": 258.0,
"learning_rate": 4.839999999999999e-07,
"loss": 3.7447,
"step": 9804
},
{
"epoch": 258.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.092291831970215,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 9804
},
{
"epoch": 259.0,
"learning_rate": 4.82e-07,
"loss": 3.7483,
"step": 9842
},
{
"epoch": 259.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.089372158050537,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 9842
},
{
"epoch": 260.0,
"learning_rate": 4.8e-07,
"loss": 3.7428,
"step": 9880
},
{
"epoch": 260.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.090963840484619,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 9880
},
{
"epoch": 261.0,
"learning_rate": 4.779999999999999e-07,
"loss": 3.7407,
"step": 9918
},
{
"epoch": 261.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.087746620178223,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 9918
},
{
"epoch": 262.0,
"learning_rate": 4.76e-07,
"loss": 3.7405,
"step": 9956
},
{
"epoch": 262.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.089057922363281,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 9956
},
{
"epoch": 263.0,
"learning_rate": 4.7399999999999993e-07,
"loss": 3.7354,
"step": 9994
},
{
"epoch": 263.0,
"eval_accuracy": 0.4088465298142718,
"eval_loss": 4.0869574546813965,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 9994
},
{
"epoch": 264.0,
"learning_rate": 4.7199999999999994e-07,
"loss": 3.7353,
"step": 10032
},
{
"epoch": 264.0,
"eval_accuracy": 0.40860215053763443,
"eval_loss": 4.085577487945557,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 10032
},
{
"epoch": 265.0,
"learning_rate": 4.6999999999999995e-07,
"loss": 3.7312,
"step": 10070
},
{
"epoch": 265.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.083754062652588,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 10070
},
{
"epoch": 266.0,
"learning_rate": 4.68e-07,
"loss": 3.7313,
"step": 10108
},
{
"epoch": 266.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.082942485809326,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 10108
},
{
"epoch": 267.0,
"learning_rate": 4.66e-07,
"loss": 3.7264,
"step": 10146
},
{
"epoch": 267.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.0826802253723145,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 10146
},
{
"epoch": 268.0,
"learning_rate": 4.64e-07,
"loss": 3.7221,
"step": 10184
},
{
"epoch": 268.0,
"eval_accuracy": 0.40933528836754646,
"eval_loss": 4.081498622894287,
"eval_runtime": 0.6152,
"eval_samples_per_second": 6.502,
"eval_steps_per_second": 1.625,
"step": 10184
},
{
"epoch": 269.0,
"learning_rate": 4.62e-07,
"loss": 3.7211,
"step": 10222
},
{
"epoch": 269.0,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 4.0801472663879395,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 10222
},
{
"epoch": 270.0,
"learning_rate": 4.6e-07,
"loss": 3.7232,
"step": 10260
},
{
"epoch": 270.0,
"eval_accuracy": 0.40933528836754646,
"eval_loss": 4.0787458419799805,
"eval_runtime": 0.6151,
"eval_samples_per_second": 6.503,
"eval_steps_per_second": 1.626,
"step": 10260
},
{
"epoch": 271.0,
"learning_rate": 4.58e-07,
"loss": 3.718,
"step": 10298
},
{
"epoch": 271.0,
"eval_accuracy": 0.4100684261974585,
"eval_loss": 4.07801628112793,
"eval_runtime": 0.6249,
"eval_samples_per_second": 6.401,
"eval_steps_per_second": 1.6,
"step": 10298
},
{
"epoch": 272.0,
"learning_rate": 4.56e-07,
"loss": 3.7208,
"step": 10336
},
{
"epoch": 272.0,
"eval_accuracy": 0.4108015640273705,
"eval_loss": 4.077081203460693,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 10336
},
{
"epoch": 273.0,
"learning_rate": 4.54e-07,
"loss": 3.7109,
"step": 10374
},
{
"epoch": 273.0,
"eval_accuracy": 0.4115347018572825,
"eval_loss": 4.07664155960083,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 10374
},
{
"epoch": 274.0,
"learning_rate": 4.5199999999999997e-07,
"loss": 3.7146,
"step": 10412
},
{
"epoch": 274.0,
"eval_accuracy": 0.41104594330400784,
"eval_loss": 4.073920249938965,
"eval_runtime": 0.626,
"eval_samples_per_second": 6.39,
"eval_steps_per_second": 1.597,
"step": 10412
},
{
"epoch": 275.0,
"learning_rate": 4.5e-07,
"loss": 3.7071,
"step": 10450
},
{
"epoch": 275.0,
"eval_accuracy": 0.41177908113391987,
"eval_loss": 4.073719501495361,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 10450
},
{
"epoch": 276.0,
"learning_rate": 4.48e-07,
"loss": 3.7044,
"step": 10488
},
{
"epoch": 276.0,
"eval_accuracy": 0.41226783968719455,
"eval_loss": 4.074197769165039,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 10488
},
{
"epoch": 277.0,
"learning_rate": 4.46e-07,
"loss": 3.7094,
"step": 10526
},
{
"epoch": 277.0,
"eval_accuracy": 0.4125122189638319,
"eval_loss": 4.071889400482178,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 10526
},
{
"epoch": 278.0,
"learning_rate": 4.44e-07,
"loss": 3.7028,
"step": 10564
},
{
"epoch": 278.0,
"eval_accuracy": 0.4120234604105572,
"eval_loss": 4.071835994720459,
"eval_runtime": 0.6231,
"eval_samples_per_second": 6.419,
"eval_steps_per_second": 1.605,
"step": 10564
},
{
"epoch": 279.0,
"learning_rate": 4.4199999999999996e-07,
"loss": 3.7051,
"step": 10602
},
{
"epoch": 279.0,
"eval_accuracy": 0.4120234604105572,
"eval_loss": 4.069863319396973,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 10602
},
{
"epoch": 280.0,
"learning_rate": 4.3999999999999997e-07,
"loss": 3.7011,
"step": 10640
},
{
"epoch": 280.0,
"eval_accuracy": 0.4125122189638319,
"eval_loss": 4.068091869354248,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 10640
},
{
"epoch": 281.0,
"learning_rate": 4.38e-07,
"loss": 3.6954,
"step": 10678
},
{
"epoch": 281.0,
"eval_accuracy": 0.4120234604105572,
"eval_loss": 4.066802501678467,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 10678
},
{
"epoch": 282.0,
"learning_rate": 4.36e-07,
"loss": 3.6933,
"step": 10716
},
{
"epoch": 282.0,
"eval_accuracy": 0.41226783968719455,
"eval_loss": 4.066892623901367,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 10716
},
{
"epoch": 283.0,
"learning_rate": 4.34e-07,
"loss": 3.6935,
"step": 10754
},
{
"epoch": 283.0,
"eval_accuracy": 0.4125122189638319,
"eval_loss": 4.063753128051758,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 10754
},
{
"epoch": 284.0,
"learning_rate": 4.3199999999999995e-07,
"loss": 3.6867,
"step": 10792
},
{
"epoch": 284.0,
"eval_accuracy": 0.4125122189638319,
"eval_loss": 4.065001964569092,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 10792
},
{
"epoch": 285.0,
"learning_rate": 4.2999999999999996e-07,
"loss": 3.6888,
"step": 10830
},
{
"epoch": 285.0,
"eval_accuracy": 0.4120234604105572,
"eval_loss": 4.0640668869018555,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 10830
},
{
"epoch": 286.0,
"learning_rate": 4.2799999999999997e-07,
"loss": 3.6843,
"step": 10868
},
{
"epoch": 286.0,
"eval_accuracy": 0.4115347018572825,
"eval_loss": 4.0637993812561035,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 10868
},
{
"epoch": 287.0,
"learning_rate": 4.26e-07,
"loss": 3.6824,
"step": 10906
},
{
"epoch": 287.0,
"eval_accuracy": 0.4125122189638319,
"eval_loss": 4.06214714050293,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 10906
},
{
"epoch": 288.0,
"learning_rate": 4.24e-07,
"loss": 3.6821,
"step": 10944
},
{
"epoch": 288.0,
"eval_accuracy": 0.41226783968719455,
"eval_loss": 4.060315132141113,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 10944
},
{
"epoch": 289.0,
"learning_rate": 4.2199999999999994e-07,
"loss": 3.6802,
"step": 10982
},
{
"epoch": 289.0,
"eval_accuracy": 0.4125122189638319,
"eval_loss": 4.062171459197998,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 10982
},
{
"epoch": 290.0,
"learning_rate": 4.1999999999999995e-07,
"loss": 3.6789,
"step": 11020
},
{
"epoch": 290.0,
"eval_accuracy": 0.41275659824046923,
"eval_loss": 4.057875633239746,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 11020
},
{
"epoch": 291.0,
"learning_rate": 4.1799999999999996e-07,
"loss": 3.6767,
"step": 11058
},
{
"epoch": 291.0,
"eval_accuracy": 0.41300097751710657,
"eval_loss": 4.057925701141357,
"eval_runtime": 0.6126,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.632,
"step": 11058
},
{
"epoch": 292.0,
"learning_rate": 4.1599999999999997e-07,
"loss": 3.6751,
"step": 11096
},
{
"epoch": 292.0,
"eval_accuracy": 0.4137341153470186,
"eval_loss": 4.058208465576172,
"eval_runtime": 0.6175,
"eval_samples_per_second": 6.478,
"eval_steps_per_second": 1.62,
"step": 11096
},
{
"epoch": 293.0,
"learning_rate": 4.14e-07,
"loss": 3.6726,
"step": 11134
},
{
"epoch": 293.0,
"eval_accuracy": 0.4137341153470186,
"eval_loss": 4.055559158325195,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 11134
},
{
"epoch": 294.0,
"learning_rate": 4.12e-07,
"loss": 3.6704,
"step": 11172
},
{
"epoch": 294.0,
"eval_accuracy": 0.4137341153470186,
"eval_loss": 4.058291435241699,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 11172
},
{
"epoch": 295.0,
"learning_rate": 4.0999999999999994e-07,
"loss": 3.6703,
"step": 11210
},
{
"epoch": 295.0,
"eval_accuracy": 0.4142228739002933,
"eval_loss": 4.055552005767822,
"eval_runtime": 0.626,
"eval_samples_per_second": 6.39,
"eval_steps_per_second": 1.598,
"step": 11210
},
{
"epoch": 296.0,
"learning_rate": 4.0799999999999995e-07,
"loss": 3.6662,
"step": 11248
},
{
"epoch": 296.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.05183219909668,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 11248
},
{
"epoch": 297.0,
"learning_rate": 4.06e-07,
"loss": 3.6643,
"step": 11286
},
{
"epoch": 297.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.05209493637085,
"eval_runtime": 0.626,
"eval_samples_per_second": 6.39,
"eval_steps_per_second": 1.597,
"step": 11286
},
{
"epoch": 298.0,
"learning_rate": 4.04e-07,
"loss": 3.6623,
"step": 11324
},
{
"epoch": 298.0,
"eval_accuracy": 0.4144672531769306,
"eval_loss": 4.054409980773926,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 11324
},
{
"epoch": 299.0,
"learning_rate": 4.02e-07,
"loss": 3.6626,
"step": 11362
},
{
"epoch": 299.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.051777362823486,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 11362
},
{
"epoch": 300.0,
"learning_rate": 4e-07,
"loss": 3.661,
"step": 11400
},
{
"epoch": 300.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.049643516540527,
"eval_runtime": 0.7928,
"eval_samples_per_second": 5.046,
"eval_steps_per_second": 1.261,
"step": 11400
},
{
"epoch": 301.0,
"learning_rate": 3.98e-07,
"loss": 3.6553,
"step": 11438
},
{
"epoch": 301.0,
"eval_accuracy": 0.4149560117302053,
"eval_loss": 4.048153400421143,
"eval_runtime": 0.6123,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 11438
},
{
"epoch": 302.0,
"learning_rate": 3.96e-07,
"loss": 3.6573,
"step": 11476
},
{
"epoch": 302.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.047247886657715,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 11476
},
{
"epoch": 303.0,
"learning_rate": 3.94e-07,
"loss": 3.6548,
"step": 11514
},
{
"epoch": 303.0,
"eval_accuracy": 0.41520039100684264,
"eval_loss": 4.046008586883545,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 11514
},
{
"epoch": 304.0,
"learning_rate": 3.92e-07,
"loss": 3.6531,
"step": 11552
},
{
"epoch": 304.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.046994209289551,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 11552
},
{
"epoch": 305.0,
"learning_rate": 3.8999999999999997e-07,
"loss": 3.6549,
"step": 11590
},
{
"epoch": 305.0,
"eval_accuracy": 0.4149560117302053,
"eval_loss": 4.046128273010254,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 11590
},
{
"epoch": 306.0,
"learning_rate": 3.88e-07,
"loss": 3.6485,
"step": 11628
},
{
"epoch": 306.0,
"eval_accuracy": 0.41471163245356796,
"eval_loss": 4.0460734367370605,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 11628
},
{
"epoch": 307.0,
"learning_rate": 3.86e-07,
"loss": 3.6441,
"step": 11666
},
{
"epoch": 307.0,
"eval_accuracy": 0.4149560117302053,
"eval_loss": 4.046470642089844,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 11666
},
{
"epoch": 308.0,
"learning_rate": 3.84e-07,
"loss": 3.6438,
"step": 11704
},
{
"epoch": 308.0,
"eval_accuracy": 0.41593352883675466,
"eval_loss": 4.042454719543457,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 11704
},
{
"epoch": 309.0,
"learning_rate": 3.82e-07,
"loss": 3.6435,
"step": 11742
},
{
"epoch": 309.0,
"eval_accuracy": 0.4156891495601173,
"eval_loss": 4.040951251983643,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 11742
},
{
"epoch": 310.0,
"learning_rate": 3.7999999999999996e-07,
"loss": 3.6397,
"step": 11780
},
{
"epoch": 310.0,
"eval_accuracy": 0.41593352883675466,
"eval_loss": 4.040650844573975,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 11780
},
{
"epoch": 311.0,
"learning_rate": 3.7799999999999997e-07,
"loss": 3.6363,
"step": 11818
},
{
"epoch": 311.0,
"eval_accuracy": 0.41544477028348,
"eval_loss": 4.042422294616699,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 11818
},
{
"epoch": 312.0,
"learning_rate": 3.76e-07,
"loss": 3.6315,
"step": 11856
},
{
"epoch": 312.0,
"eval_accuracy": 0.41544477028348,
"eval_loss": 4.043632984161377,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 11856
},
{
"epoch": 313.0,
"learning_rate": 3.74e-07,
"loss": 3.6323,
"step": 11894
},
{
"epoch": 313.0,
"eval_accuracy": 0.4156891495601173,
"eval_loss": 4.040919303894043,
"eval_runtime": 0.628,
"eval_samples_per_second": 6.369,
"eval_steps_per_second": 1.592,
"step": 11894
},
{
"epoch": 314.0,
"learning_rate": 3.72e-07,
"loss": 3.6386,
"step": 11932
},
{
"epoch": 314.0,
"eval_accuracy": 0.4156891495601173,
"eval_loss": 4.038565158843994,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 11932
},
{
"epoch": 315.0,
"learning_rate": 3.7e-07,
"loss": 3.6303,
"step": 11970
},
{
"epoch": 315.0,
"eval_accuracy": 0.41544477028348,
"eval_loss": 4.0388689041137695,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 11970
},
{
"epoch": 316.0,
"learning_rate": 3.6799999999999996e-07,
"loss": 3.6336,
"step": 12008
},
{
"epoch": 316.0,
"eval_accuracy": 0.41642228739002934,
"eval_loss": 4.039405345916748,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 12008
},
{
"epoch": 317.0,
"learning_rate": 3.6599999999999997e-07,
"loss": 3.6281,
"step": 12046
},
{
"epoch": 317.0,
"eval_accuracy": 0.4166666666666667,
"eval_loss": 4.038857460021973,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 12046
},
{
"epoch": 318.0,
"learning_rate": 3.64e-07,
"loss": 3.6249,
"step": 12084
},
{
"epoch": 318.0,
"eval_accuracy": 0.41764418377321605,
"eval_loss": 4.037881374359131,
"eval_runtime": 0.7782,
"eval_samples_per_second": 5.14,
"eval_steps_per_second": 1.285,
"step": 12084
},
{
"epoch": 319.0,
"learning_rate": 3.62e-07,
"loss": 3.6277,
"step": 12122
},
{
"epoch": 319.0,
"eval_accuracy": 0.41764418377321605,
"eval_loss": 4.037135601043701,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 12122
},
{
"epoch": 320.0,
"learning_rate": 3.6e-07,
"loss": 3.6232,
"step": 12160
},
{
"epoch": 320.0,
"eval_accuracy": 0.41715542521994137,
"eval_loss": 4.035280704498291,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 12160
},
{
"epoch": 321.0,
"learning_rate": 3.5799999999999995e-07,
"loss": 3.6177,
"step": 12198
},
{
"epoch": 321.0,
"eval_accuracy": 0.41764418377321605,
"eval_loss": 4.036287307739258,
"eval_runtime": 0.6153,
"eval_samples_per_second": 6.501,
"eval_steps_per_second": 1.625,
"step": 12198
},
{
"epoch": 322.0,
"learning_rate": 3.5599999999999996e-07,
"loss": 3.626,
"step": 12236
},
{
"epoch": 322.0,
"eval_accuracy": 0.4173998044965787,
"eval_loss": 4.031866073608398,
"eval_runtime": 0.6128,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 12236
},
{
"epoch": 323.0,
"learning_rate": 3.5399999999999997e-07,
"loss": 3.6181,
"step": 12274
},
{
"epoch": 323.0,
"eval_accuracy": 0.41715542521994137,
"eval_loss": 4.031935691833496,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 12274
},
{
"epoch": 324.0,
"learning_rate": 3.52e-07,
"loss": 3.6183,
"step": 12312
},
{
"epoch": 324.0,
"eval_accuracy": 0.41764418377321605,
"eval_loss": 4.03291130065918,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 12312
},
{
"epoch": 325.0,
"learning_rate": 3.5e-07,
"loss": 3.6169,
"step": 12350
},
{
"epoch": 325.0,
"eval_accuracy": 0.41764418377321605,
"eval_loss": 4.032841682434082,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 12350
},
{
"epoch": 326.0,
"learning_rate": 3.4799999999999994e-07,
"loss": 3.6094,
"step": 12388
},
{
"epoch": 326.0,
"eval_accuracy": 0.4178885630498534,
"eval_loss": 4.031832218170166,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 12388
},
{
"epoch": 327.0,
"learning_rate": 3.4599999999999995e-07,
"loss": 3.6138,
"step": 12426
},
{
"epoch": 327.0,
"eval_accuracy": 0.4178885630498534,
"eval_loss": 4.029395580291748,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 12426
},
{
"epoch": 328.0,
"learning_rate": 3.4399999999999996e-07,
"loss": 3.6101,
"step": 12464
},
{
"epoch": 328.0,
"eval_accuracy": 0.41813294232649073,
"eval_loss": 4.031092166900635,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 12464
},
{
"epoch": 329.0,
"learning_rate": 3.42e-07,
"loss": 3.6062,
"step": 12502
},
{
"epoch": 329.0,
"eval_accuracy": 0.41837732160312807,
"eval_loss": 4.029919624328613,
"eval_runtime": 0.6173,
"eval_samples_per_second": 6.48,
"eval_steps_per_second": 1.62,
"step": 12502
},
{
"epoch": 330.0,
"learning_rate": 3.4000000000000003e-07,
"loss": 3.6093,
"step": 12540
},
{
"epoch": 330.0,
"eval_accuracy": 0.41813294232649073,
"eval_loss": 4.027568817138672,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 12540
},
{
"epoch": 331.0,
"learning_rate": 3.38e-07,
"loss": 3.6071,
"step": 12578
},
{
"epoch": 331.0,
"eval_accuracy": 0.41813294232649073,
"eval_loss": 4.030076503753662,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 12578
},
{
"epoch": 332.0,
"learning_rate": 3.36e-07,
"loss": 3.6064,
"step": 12616
},
{
"epoch": 332.0,
"eval_accuracy": 0.41837732160312807,
"eval_loss": 4.027680397033691,
"eval_runtime": 0.6131,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 12616
},
{
"epoch": 333.0,
"learning_rate": 3.34e-07,
"loss": 3.5982,
"step": 12654
},
{
"epoch": 333.0,
"eval_accuracy": 0.41837732160312807,
"eval_loss": 4.028773784637451,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 12654
},
{
"epoch": 334.0,
"learning_rate": 3.32e-07,
"loss": 3.6064,
"step": 12692
},
{
"epoch": 334.0,
"eval_accuracy": 0.4178885630498534,
"eval_loss": 4.0255818367004395,
"eval_runtime": 0.6242,
"eval_samples_per_second": 6.408,
"eval_steps_per_second": 1.602,
"step": 12692
},
{
"epoch": 335.0,
"learning_rate": 3.3e-07,
"loss": 3.6023,
"step": 12730
},
{
"epoch": 335.0,
"eval_accuracy": 0.41837732160312807,
"eval_loss": 4.025238037109375,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 12730
},
{
"epoch": 336.0,
"learning_rate": 3.28e-07,
"loss": 3.5992,
"step": 12768
},
{
"epoch": 336.0,
"eval_accuracy": 0.4186217008797654,
"eval_loss": 4.024014472961426,
"eval_runtime": 0.6127,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 12768
},
{
"epoch": 337.0,
"learning_rate": 3.26e-07,
"loss": 3.5997,
"step": 12806
},
{
"epoch": 337.0,
"eval_accuracy": 0.41886608015640275,
"eval_loss": 4.0236945152282715,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 12806
},
{
"epoch": 338.0,
"learning_rate": 3.24e-07,
"loss": 3.5955,
"step": 12844
},
{
"epoch": 338.0,
"eval_accuracy": 0.4186217008797654,
"eval_loss": 4.02353048324585,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 12844
},
{
"epoch": 339.0,
"learning_rate": 3.22e-07,
"loss": 3.5929,
"step": 12882
},
{
"epoch": 339.0,
"eval_accuracy": 0.4186217008797654,
"eval_loss": 4.023321151733398,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 12882
},
{
"epoch": 340.0,
"learning_rate": 3.2e-07,
"loss": 3.5953,
"step": 12920
},
{
"epoch": 340.0,
"eval_accuracy": 0.41886608015640275,
"eval_loss": 4.020965099334717,
"eval_runtime": 0.6126,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.632,
"step": 12920
},
{
"epoch": 341.0,
"learning_rate": 3.18e-07,
"loss": 3.5915,
"step": 12958
},
{
"epoch": 341.0,
"eval_accuracy": 0.41837732160312807,
"eval_loss": 4.020979404449463,
"eval_runtime": 0.6256,
"eval_samples_per_second": 6.394,
"eval_steps_per_second": 1.598,
"step": 12958
},
{
"epoch": 342.0,
"learning_rate": 3.1599999999999997e-07,
"loss": 3.5835,
"step": 12996
},
{
"epoch": 342.0,
"eval_accuracy": 0.41886608015640275,
"eval_loss": 4.022586345672607,
"eval_runtime": 0.6251,
"eval_samples_per_second": 6.399,
"eval_steps_per_second": 1.6,
"step": 12996
},
{
"epoch": 343.0,
"learning_rate": 3.14e-07,
"loss": 3.5852,
"step": 13034
},
{
"epoch": 343.0,
"eval_accuracy": 0.41886608015640275,
"eval_loss": 4.022684574127197,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 13034
},
{
"epoch": 344.0,
"learning_rate": 3.12e-07,
"loss": 3.5894,
"step": 13072
},
{
"epoch": 344.0,
"eval_accuracy": 0.4191104594330401,
"eval_loss": 4.022200584411621,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 13072
},
{
"epoch": 345.0,
"learning_rate": 3.1e-07,
"loss": 3.5864,
"step": 13110
},
{
"epoch": 345.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.022695541381836,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 13110
},
{
"epoch": 346.0,
"learning_rate": 3.08e-07,
"loss": 3.5854,
"step": 13148
},
{
"epoch": 346.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.018957138061523,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.627,
"step": 13148
},
{
"epoch": 347.0,
"learning_rate": 3.0599999999999996e-07,
"loss": 3.5841,
"step": 13186
},
{
"epoch": 347.0,
"eval_accuracy": 0.4191104594330401,
"eval_loss": 4.017984390258789,
"eval_runtime": 0.6155,
"eval_samples_per_second": 6.499,
"eval_steps_per_second": 1.625,
"step": 13186
},
{
"epoch": 348.0,
"learning_rate": 3.0399999999999997e-07,
"loss": 3.5821,
"step": 13224
},
{
"epoch": 348.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.018927097320557,
"eval_runtime": 0.6152,
"eval_samples_per_second": 6.502,
"eval_steps_per_second": 1.626,
"step": 13224
},
{
"epoch": 349.0,
"learning_rate": 3.02e-07,
"loss": 3.5823,
"step": 13262
},
{
"epoch": 349.0,
"eval_accuracy": 0.4191104594330401,
"eval_loss": 4.0175862312316895,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 13262
},
{
"epoch": 350.0,
"learning_rate": 3e-07,
"loss": 3.5772,
"step": 13300
},
{
"epoch": 350.0,
"eval_accuracy": 0.4191104594330401,
"eval_loss": 4.016434669494629,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 13300
},
{
"epoch": 351.0,
"learning_rate": 2.98e-07,
"loss": 3.5827,
"step": 13338
},
{
"epoch": 351.0,
"eval_accuracy": 0.4186217008797654,
"eval_loss": 4.014683723449707,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 13338
},
{
"epoch": 352.0,
"learning_rate": 2.9599999999999995e-07,
"loss": 3.5747,
"step": 13376
},
{
"epoch": 352.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.014786720275879,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 13376
},
{
"epoch": 353.0,
"learning_rate": 2.9399999999999996e-07,
"loss": 3.5745,
"step": 13414
},
{
"epoch": 353.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.016923904418945,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 13414
},
{
"epoch": 354.0,
"learning_rate": 2.9199999999999997e-07,
"loss": 3.576,
"step": 13452
},
{
"epoch": 354.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.0161919593811035,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 13452
},
{
"epoch": 355.0,
"learning_rate": 2.9e-07,
"loss": 3.5723,
"step": 13490
},
{
"epoch": 355.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.012264728546143,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 13490
},
{
"epoch": 356.0,
"learning_rate": 2.88e-07,
"loss": 3.5669,
"step": 13528
},
{
"epoch": 356.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.014427185058594,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 13528
},
{
"epoch": 357.0,
"learning_rate": 2.8599999999999994e-07,
"loss": 3.5721,
"step": 13566
},
{
"epoch": 357.0,
"eval_accuracy": 0.41886608015640275,
"eval_loss": 4.0136189460754395,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 13566
},
{
"epoch": 358.0,
"learning_rate": 2.8399999999999995e-07,
"loss": 3.5725,
"step": 13604
},
{
"epoch": 358.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.01244592666626,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 13604
},
{
"epoch": 359.0,
"learning_rate": 2.8199999999999996e-07,
"loss": 3.5627,
"step": 13642
},
{
"epoch": 359.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.012938976287842,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 13642
},
{
"epoch": 360.0,
"learning_rate": 2.8e-07,
"loss": 3.5632,
"step": 13680
},
{
"epoch": 360.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.012718677520752,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 13680
},
{
"epoch": 361.0,
"learning_rate": 2.7800000000000003e-07,
"loss": 3.5641,
"step": 13718
},
{
"epoch": 361.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.01040506362915,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 13718
},
{
"epoch": 362.0,
"learning_rate": 2.7600000000000004e-07,
"loss": 3.5636,
"step": 13756
},
{
"epoch": 362.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.010016918182373,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 13756
},
{
"epoch": 363.0,
"learning_rate": 2.74e-07,
"loss": 3.5566,
"step": 13794
},
{
"epoch": 363.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.01265811920166,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 13794
},
{
"epoch": 364.0,
"learning_rate": 2.72e-07,
"loss": 3.5556,
"step": 13832
},
{
"epoch": 364.0,
"eval_accuracy": 0.4198435972629521,
"eval_loss": 4.013090133666992,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.627,
"step": 13832
},
{
"epoch": 365.0,
"learning_rate": 2.7e-07,
"loss": 3.5606,
"step": 13870
},
{
"epoch": 365.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.01081657409668,
"eval_runtime": 0.6265,
"eval_samples_per_second": 6.385,
"eval_steps_per_second": 1.596,
"step": 13870
},
{
"epoch": 366.0,
"learning_rate": 2.68e-07,
"loss": 3.5573,
"step": 13908
},
{
"epoch": 366.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.009543418884277,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 13908
},
{
"epoch": 367.0,
"learning_rate": 2.66e-07,
"loss": 3.5603,
"step": 13946
},
{
"epoch": 367.0,
"eval_accuracy": 0.4191104594330401,
"eval_loss": 4.007948875427246,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 13946
},
{
"epoch": 368.0,
"learning_rate": 2.64e-07,
"loss": 3.5552,
"step": 13984
},
{
"epoch": 368.0,
"eval_accuracy": 0.4191104594330401,
"eval_loss": 4.007278919219971,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 13984
},
{
"epoch": 369.0,
"learning_rate": 2.62e-07,
"loss": 3.5594,
"step": 14022
},
{
"epoch": 369.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.007977485656738,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 14022
},
{
"epoch": 370.0,
"learning_rate": 2.6e-07,
"loss": 3.5557,
"step": 14060
},
{
"epoch": 370.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.006712913513184,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 14060
},
{
"epoch": 371.0,
"learning_rate": 2.58e-07,
"loss": 3.5523,
"step": 14098
},
{
"epoch": 371.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.006473541259766,
"eval_runtime": 0.615,
"eval_samples_per_second": 6.504,
"eval_steps_per_second": 1.626,
"step": 14098
},
{
"epoch": 372.0,
"learning_rate": 2.56e-07,
"loss": 3.5516,
"step": 14136
},
{
"epoch": 372.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.007019519805908,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 14136
},
{
"epoch": 373.0,
"learning_rate": 2.5399999999999997e-07,
"loss": 3.5466,
"step": 14174
},
{
"epoch": 373.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.007321834564209,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 14174
},
{
"epoch": 374.0,
"learning_rate": 2.52e-07,
"loss": 3.5474,
"step": 14212
},
{
"epoch": 374.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.004045486450195,
"eval_runtime": 0.6268,
"eval_samples_per_second": 6.381,
"eval_steps_per_second": 1.595,
"step": 14212
},
{
"epoch": 375.0,
"learning_rate": 2.5e-07,
"loss": 3.5481,
"step": 14250
},
{
"epoch": 375.0,
"eval_accuracy": 0.4195992179863148,
"eval_loss": 4.003184795379639,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 14250
},
{
"epoch": 376.0,
"learning_rate": 2.48e-07,
"loss": 3.5496,
"step": 14288
},
{
"epoch": 376.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.00510311126709,
"eval_runtime": 0.6153,
"eval_samples_per_second": 6.501,
"eval_steps_per_second": 1.625,
"step": 14288
},
{
"epoch": 377.0,
"learning_rate": 2.46e-07,
"loss": 3.5489,
"step": 14326
},
{
"epoch": 377.0,
"eval_accuracy": 0.41935483870967744,
"eval_loss": 4.003530502319336,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 14326
},
{
"epoch": 378.0,
"learning_rate": 2.4399999999999996e-07,
"loss": 3.5439,
"step": 14364
},
{
"epoch": 378.0,
"eval_accuracy": 0.4198435972629521,
"eval_loss": 4.0032219886779785,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 14364
},
{
"epoch": 379.0,
"learning_rate": 2.4199999999999997e-07,
"loss": 3.5464,
"step": 14402
},
{
"epoch": 379.0,
"eval_accuracy": 0.42057673509286414,
"eval_loss": 4.002893924713135,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 14402
},
{
"epoch": 380.0,
"learning_rate": 2.4e-07,
"loss": 3.5455,
"step": 14440
},
{
"epoch": 380.0,
"eval_accuracy": 0.4198435972629521,
"eval_loss": 4.003747463226318,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 14440
},
{
"epoch": 381.0,
"learning_rate": 2.38e-07,
"loss": 3.5439,
"step": 14478
},
{
"epoch": 381.0,
"eval_accuracy": 0.42057673509286414,
"eval_loss": 4.002392292022705,
"eval_runtime": 0.6152,
"eval_samples_per_second": 6.502,
"eval_steps_per_second": 1.626,
"step": 14478
},
{
"epoch": 382.0,
"learning_rate": 2.3599999999999997e-07,
"loss": 3.542,
"step": 14516
},
{
"epoch": 382.0,
"eval_accuracy": 0.4203323558162268,
"eval_loss": 4.001096725463867,
"eval_runtime": 0.6126,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 14516
},
{
"epoch": 383.0,
"learning_rate": 2.34e-07,
"loss": 3.5366,
"step": 14554
},
{
"epoch": 383.0,
"eval_accuracy": 0.4203323558162268,
"eval_loss": 4.001129150390625,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 14554
},
{
"epoch": 384.0,
"learning_rate": 2.32e-07,
"loss": 3.5368,
"step": 14592
},
{
"epoch": 384.0,
"eval_accuracy": 0.42057673509286414,
"eval_loss": 4.001524448394775,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 14592
},
{
"epoch": 385.0,
"learning_rate": 2.3e-07,
"loss": 3.5382,
"step": 14630
},
{
"epoch": 385.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 4.0017523765563965,
"eval_runtime": 0.6151,
"eval_samples_per_second": 6.503,
"eval_steps_per_second": 1.626,
"step": 14630
},
{
"epoch": 386.0,
"learning_rate": 2.28e-07,
"loss": 3.5358,
"step": 14668
},
{
"epoch": 386.0,
"eval_accuracy": 0.42008797653958946,
"eval_loss": 4.000154495239258,
"eval_runtime": 0.6297,
"eval_samples_per_second": 6.352,
"eval_steps_per_second": 1.588,
"step": 14668
},
{
"epoch": 387.0,
"learning_rate": 2.2599999999999999e-07,
"loss": 3.5324,
"step": 14706
},
{
"epoch": 387.0,
"eval_accuracy": 0.4198435972629521,
"eval_loss": 3.9989571571350098,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 14706
},
{
"epoch": 388.0,
"learning_rate": 2.24e-07,
"loss": 3.5378,
"step": 14744
},
{
"epoch": 388.0,
"eval_accuracy": 0.42057673509286414,
"eval_loss": 4.000235080718994,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 14744
},
{
"epoch": 389.0,
"learning_rate": 2.22e-07,
"loss": 3.5334,
"step": 14782
},
{
"epoch": 389.0,
"eval_accuracy": 0.4208211143695015,
"eval_loss": 3.9985251426696777,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.627,
"step": 14782
},
{
"epoch": 390.0,
"learning_rate": 2.1999999999999998e-07,
"loss": 3.5349,
"step": 14820
},
{
"epoch": 390.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.998689651489258,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 14820
},
{
"epoch": 391.0,
"learning_rate": 2.18e-07,
"loss": 3.5378,
"step": 14858
},
{
"epoch": 391.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.9983861446380615,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 14858
},
{
"epoch": 392.0,
"learning_rate": 2.1599999999999998e-07,
"loss": 3.5304,
"step": 14896
},
{
"epoch": 392.0,
"eval_accuracy": 0.42057673509286414,
"eval_loss": 3.9976606369018555,
"eval_runtime": 0.6239,
"eval_samples_per_second": 6.411,
"eval_steps_per_second": 1.603,
"step": 14896
},
{
"epoch": 393.0,
"learning_rate": 2.1399999999999998e-07,
"loss": 3.5241,
"step": 14934
},
{
"epoch": 393.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9984891414642334,
"eval_runtime": 0.6127,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 14934
},
{
"epoch": 394.0,
"learning_rate": 2.12e-07,
"loss": 3.527,
"step": 14972
},
{
"epoch": 394.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.9997339248657227,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 14972
},
{
"epoch": 395.0,
"learning_rate": 2.0999999999999997e-07,
"loss": 3.5261,
"step": 15010
},
{
"epoch": 395.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.9985299110412598,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 15010
},
{
"epoch": 396.0,
"learning_rate": 2.0799999999999998e-07,
"loss": 3.5233,
"step": 15048
},
{
"epoch": 396.0,
"eval_accuracy": 0.4215542521994135,
"eval_loss": 3.9982762336730957,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.627,
"step": 15048
},
{
"epoch": 397.0,
"learning_rate": 2.06e-07,
"loss": 3.5279,
"step": 15086
},
{
"epoch": 397.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9965884685516357,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 15086
},
{
"epoch": 398.0,
"learning_rate": 2.0399999999999997e-07,
"loss": 3.5276,
"step": 15124
},
{
"epoch": 398.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.995763063430786,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 15124
},
{
"epoch": 399.0,
"learning_rate": 2.02e-07,
"loss": 3.5214,
"step": 15162
},
{
"epoch": 399.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9957404136657715,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 15162
},
{
"epoch": 400.0,
"learning_rate": 2e-07,
"loss": 3.5222,
"step": 15200
},
{
"epoch": 400.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.995762586593628,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 15200
},
{
"epoch": 401.0,
"learning_rate": 1.98e-07,
"loss": 3.5163,
"step": 15238
},
{
"epoch": 401.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9957165718078613,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 15238
},
{
"epoch": 402.0,
"learning_rate": 1.96e-07,
"loss": 3.5208,
"step": 15276
},
{
"epoch": 402.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.995258092880249,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 15276
},
{
"epoch": 403.0,
"learning_rate": 1.94e-07,
"loss": 3.5168,
"step": 15314
},
{
"epoch": 403.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.994943380355835,
"eval_runtime": 0.616,
"eval_samples_per_second": 6.494,
"eval_steps_per_second": 1.623,
"step": 15314
},
{
"epoch": 404.0,
"learning_rate": 1.92e-07,
"loss": 3.5242,
"step": 15352
},
{
"epoch": 404.0,
"eval_accuracy": 0.4215542521994135,
"eval_loss": 3.994105577468872,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 15352
},
{
"epoch": 405.0,
"learning_rate": 1.8999999999999998e-07,
"loss": 3.5205,
"step": 15390
},
{
"epoch": 405.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.993699789047241,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 15390
},
{
"epoch": 406.0,
"learning_rate": 1.88e-07,
"loss": 3.5158,
"step": 15428
},
{
"epoch": 406.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9949395656585693,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 15428
},
{
"epoch": 407.0,
"learning_rate": 1.86e-07,
"loss": 3.517,
"step": 15466
},
{
"epoch": 407.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9939072132110596,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 15466
},
{
"epoch": 408.0,
"learning_rate": 1.8399999999999998e-07,
"loss": 3.519,
"step": 15504
},
{
"epoch": 408.0,
"eval_accuracy": 0.4215542521994135,
"eval_loss": 3.9944329261779785,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 15504
},
{
"epoch": 409.0,
"learning_rate": 1.82e-07,
"loss": 3.5164,
"step": 15542
},
{
"epoch": 409.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9929213523864746,
"eval_runtime": 0.6175,
"eval_samples_per_second": 6.478,
"eval_steps_per_second": 1.619,
"step": 15542
},
{
"epoch": 410.0,
"learning_rate": 1.8e-07,
"loss": 3.5133,
"step": 15580
},
{
"epoch": 410.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.9925248622894287,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 15580
},
{
"epoch": 411.0,
"learning_rate": 1.7799999999999998e-07,
"loss": 3.5199,
"step": 15618
},
{
"epoch": 411.0,
"eval_accuracy": 0.4210654936461388,
"eval_loss": 3.9905953407287598,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 15618
},
{
"epoch": 412.0,
"learning_rate": 1.76e-07,
"loss": 3.5117,
"step": 15656
},
{
"epoch": 412.0,
"eval_accuracy": 0.4215542521994135,
"eval_loss": 3.9919614791870117,
"eval_runtime": 0.6189,
"eval_samples_per_second": 6.463,
"eval_steps_per_second": 1.616,
"step": 15656
},
{
"epoch": 413.0,
"learning_rate": 1.7399999999999997e-07,
"loss": 3.5151,
"step": 15694
},
{
"epoch": 413.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9906229972839355,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 15694
},
{
"epoch": 414.0,
"learning_rate": 1.7199999999999998e-07,
"loss": 3.5093,
"step": 15732
},
{
"epoch": 414.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9914052486419678,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 15732
},
{
"epoch": 415.0,
"learning_rate": 1.7000000000000001e-07,
"loss": 3.512,
"step": 15770
},
{
"epoch": 415.0,
"eval_accuracy": 0.4215542521994135,
"eval_loss": 3.9908926486968994,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 15770
},
{
"epoch": 416.0,
"learning_rate": 1.68e-07,
"loss": 3.5076,
"step": 15808
},
{
"epoch": 416.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9911580085754395,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 15808
},
{
"epoch": 417.0,
"learning_rate": 1.66e-07,
"loss": 3.5059,
"step": 15846
},
{
"epoch": 417.0,
"eval_accuracy": 0.4220430107526882,
"eval_loss": 3.9916296005249023,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 15846
},
{
"epoch": 418.0,
"learning_rate": 1.64e-07,
"loss": 3.5096,
"step": 15884
},
{
"epoch": 418.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.990671396255493,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 15884
},
{
"epoch": 419.0,
"learning_rate": 1.62e-07,
"loss": 3.5038,
"step": 15922
},
{
"epoch": 419.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9902234077453613,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 15922
},
{
"epoch": 420.0,
"learning_rate": 1.6e-07,
"loss": 3.5089,
"step": 15960
},
{
"epoch": 420.0,
"eval_accuracy": 0.4215542521994135,
"eval_loss": 3.989504814147949,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 15960
},
{
"epoch": 421.0,
"learning_rate": 1.5799999999999999e-07,
"loss": 3.5091,
"step": 15998
},
{
"epoch": 421.0,
"eval_accuracy": 0.42130987292277616,
"eval_loss": 3.9893267154693604,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 15998
},
{
"epoch": 422.0,
"learning_rate": 1.56e-07,
"loss": 3.5101,
"step": 16036
},
{
"epoch": 422.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9890270233154297,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 16036
},
{
"epoch": 423.0,
"learning_rate": 1.54e-07,
"loss": 3.5061,
"step": 16074
},
{
"epoch": 423.0,
"eval_accuracy": 0.4220430107526882,
"eval_loss": 3.990032434463501,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 16074
},
{
"epoch": 424.0,
"learning_rate": 1.5199999999999998e-07,
"loss": 3.5048,
"step": 16112
},
{
"epoch": 424.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9888319969177246,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 16112
},
{
"epoch": 425.0,
"learning_rate": 1.5e-07,
"loss": 3.501,
"step": 16150
},
{
"epoch": 425.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.9880638122558594,
"eval_runtime": 0.6256,
"eval_samples_per_second": 6.394,
"eval_steps_per_second": 1.599,
"step": 16150
},
{
"epoch": 426.0,
"learning_rate": 1.4799999999999998e-07,
"loss": 3.5067,
"step": 16188
},
{
"epoch": 426.0,
"eval_accuracy": 0.42179863147605084,
"eval_loss": 3.987746000289917,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 16188
},
{
"epoch": 427.0,
"learning_rate": 1.4599999999999998e-07,
"loss": 3.5037,
"step": 16226
},
{
"epoch": 427.0,
"eval_accuracy": 0.4222873900293255,
"eval_loss": 3.986624002456665,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 16226
},
{
"epoch": 428.0,
"learning_rate": 1.44e-07,
"loss": 3.5052,
"step": 16264
},
{
"epoch": 428.0,
"eval_accuracy": 0.4222873900293255,
"eval_loss": 3.985456943511963,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 16264
},
{
"epoch": 429.0,
"learning_rate": 1.4199999999999997e-07,
"loss": 3.5049,
"step": 16302
},
{
"epoch": 429.0,
"eval_accuracy": 0.4222873900293255,
"eval_loss": 3.9861788749694824,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 16302
},
{
"epoch": 430.0,
"learning_rate": 1.4e-07,
"loss": 3.5017,
"step": 16340
},
{
"epoch": 430.0,
"eval_accuracy": 0.4227761485826002,
"eval_loss": 3.987318992614746,
"eval_runtime": 0.6129,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 16340
},
{
"epoch": 431.0,
"learning_rate": 1.3800000000000002e-07,
"loss": 3.5038,
"step": 16378
},
{
"epoch": 431.0,
"eval_accuracy": 0.4227761485826002,
"eval_loss": 3.9872233867645264,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 16378
},
{
"epoch": 432.0,
"learning_rate": 1.36e-07,
"loss": 3.5072,
"step": 16416
},
{
"epoch": 432.0,
"eval_accuracy": 0.42253176930596287,
"eval_loss": 3.985309362411499,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 16416
},
{
"epoch": 433.0,
"learning_rate": 1.34e-07,
"loss": 3.5009,
"step": 16454
},
{
"epoch": 433.0,
"eval_accuracy": 0.42253176930596287,
"eval_loss": 3.9849016666412354,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.628,
"step": 16454
},
{
"epoch": 434.0,
"learning_rate": 1.32e-07,
"loss": 3.5023,
"step": 16492
},
{
"epoch": 434.0,
"eval_accuracy": 0.4227761485826002,
"eval_loss": 3.9856038093566895,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 16492
},
{
"epoch": 435.0,
"learning_rate": 1.3e-07,
"loss": 3.4982,
"step": 16530
},
{
"epoch": 435.0,
"eval_accuracy": 0.4227761485826002,
"eval_loss": 3.9859957695007324,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 16530
},
{
"epoch": 436.0,
"learning_rate": 1.28e-07,
"loss": 3.4927,
"step": 16568
},
{
"epoch": 436.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.9858930110931396,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.627,
"step": 16568
},
{
"epoch": 437.0,
"learning_rate": 1.26e-07,
"loss": 3.4959,
"step": 16606
},
{
"epoch": 437.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.986088514328003,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 16606
},
{
"epoch": 438.0,
"learning_rate": 1.24e-07,
"loss": 3.4984,
"step": 16644
},
{
"epoch": 438.0,
"eval_accuracy": 0.4227761485826002,
"eval_loss": 3.9860339164733887,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 16644
},
{
"epoch": 439.0,
"learning_rate": 1.2199999999999998e-07,
"loss": 3.5005,
"step": 16682
},
{
"epoch": 439.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.9846749305725098,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 16682
},
{
"epoch": 440.0,
"learning_rate": 1.2e-07,
"loss": 3.4947,
"step": 16720
},
{
"epoch": 440.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.9845149517059326,
"eval_runtime": 0.6202,
"eval_samples_per_second": 6.45,
"eval_steps_per_second": 1.612,
"step": 16720
},
{
"epoch": 441.0,
"learning_rate": 1.1799999999999998e-07,
"loss": 3.4964,
"step": 16758
},
{
"epoch": 441.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.9842681884765625,
"eval_runtime": 0.6165,
"eval_samples_per_second": 6.488,
"eval_steps_per_second": 1.622,
"step": 16758
},
{
"epoch": 442.0,
"learning_rate": 1.16e-07,
"loss": 3.4955,
"step": 16796
},
{
"epoch": 442.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.9844443798065186,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 16796
},
{
"epoch": 443.0,
"learning_rate": 1.14e-07,
"loss": 3.4923,
"step": 16834
},
{
"epoch": 443.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.9843380451202393,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 16834
},
{
"epoch": 444.0,
"learning_rate": 1.12e-07,
"loss": 3.4993,
"step": 16872
},
{
"epoch": 444.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.9841716289520264,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 16872
},
{
"epoch": 445.0,
"learning_rate": 1.0999999999999999e-07,
"loss": 3.4889,
"step": 16910
},
{
"epoch": 445.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.9846384525299072,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 16910
},
{
"epoch": 446.0,
"learning_rate": 1.0799999999999999e-07,
"loss": 3.487,
"step": 16948
},
{
"epoch": 446.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.98549485206604,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 16948
},
{
"epoch": 447.0,
"learning_rate": 1.06e-07,
"loss": 3.4965,
"step": 16986
},
{
"epoch": 447.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.985051155090332,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 16986
},
{
"epoch": 448.0,
"learning_rate": 1.0399999999999999e-07,
"loss": 3.4873,
"step": 17024
},
{
"epoch": 448.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.9851512908935547,
"eval_runtime": 0.6232,
"eval_samples_per_second": 6.419,
"eval_steps_per_second": 1.605,
"step": 17024
},
{
"epoch": 449.0,
"learning_rate": 1.0199999999999999e-07,
"loss": 3.4936,
"step": 17062
},
{
"epoch": 449.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.984747886657715,
"eval_runtime": 0.6132,
"eval_samples_per_second": 6.523,
"eval_steps_per_second": 1.631,
"step": 17062
},
{
"epoch": 450.0,
"learning_rate": 1e-07,
"loss": 3.494,
"step": 17100
},
{
"epoch": 450.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.984121084213257,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 17100
},
{
"epoch": 451.0,
"learning_rate": 9.8e-08,
"loss": 3.4855,
"step": 17138
},
{
"epoch": 451.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.9835801124572754,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 17138
},
{
"epoch": 452.0,
"learning_rate": 9.6e-08,
"loss": 3.4898,
"step": 17176
},
{
"epoch": 452.0,
"eval_accuracy": 0.42302052785923755,
"eval_loss": 3.983008623123169,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 17176
},
{
"epoch": 453.0,
"learning_rate": 9.4e-08,
"loss": 3.4866,
"step": 17214
},
{
"epoch": 453.0,
"eval_accuracy": 0.4232649071358749,
"eval_loss": 3.9831044673919678,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 17214
},
{
"epoch": 454.0,
"learning_rate": 9.199999999999999e-08,
"loss": 3.4866,
"step": 17252
},
{
"epoch": 454.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.983053207397461,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 17252
},
{
"epoch": 455.0,
"learning_rate": 9e-08,
"loss": 3.4886,
"step": 17290
},
{
"epoch": 455.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9836299419403076,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 17290
},
{
"epoch": 456.0,
"learning_rate": 8.8e-08,
"loss": 3.4874,
"step": 17328
},
{
"epoch": 456.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.983760118484497,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 17328
},
{
"epoch": 457.0,
"learning_rate": 8.599999999999999e-08,
"loss": 3.486,
"step": 17366
},
{
"epoch": 457.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.983823776245117,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 17366
},
{
"epoch": 458.0,
"learning_rate": 8.4e-08,
"loss": 3.4869,
"step": 17404
},
{
"epoch": 458.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.983541488647461,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 17404
},
{
"epoch": 459.0,
"learning_rate": 8.2e-08,
"loss": 3.4845,
"step": 17442
},
{
"epoch": 459.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9833405017852783,
"eval_runtime": 0.6146,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 1.627,
"step": 17442
},
{
"epoch": 460.0,
"learning_rate": 8e-08,
"loss": 3.4849,
"step": 17480
},
{
"epoch": 460.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9825379848480225,
"eval_runtime": 0.6139,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 17480
},
{
"epoch": 461.0,
"learning_rate": 7.8e-08,
"loss": 3.4841,
"step": 17518
},
{
"epoch": 461.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.981783628463745,
"eval_runtime": 0.6149,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 1.626,
"step": 17518
},
{
"epoch": 462.0,
"learning_rate": 7.599999999999999e-08,
"loss": 3.4924,
"step": 17556
},
{
"epoch": 462.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.9813952445983887,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.512,
"eval_steps_per_second": 1.628,
"step": 17556
},
{
"epoch": 463.0,
"learning_rate": 7.399999999999999e-08,
"loss": 3.571,
"step": 17594
},
{
"epoch": 463.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.981501579284668,
"eval_runtime": 0.6142,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 17594
},
{
"epoch": 464.0,
"learning_rate": 7.2e-08,
"loss": 3.4811,
"step": 17632
},
{
"epoch": 464.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.981280565261841,
"eval_runtime": 0.6153,
"eval_samples_per_second": 6.501,
"eval_steps_per_second": 1.625,
"step": 17632
},
{
"epoch": 465.0,
"learning_rate": 7e-08,
"loss": 3.4851,
"step": 17670
},
{
"epoch": 465.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.9809834957122803,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 17670
},
{
"epoch": 466.0,
"learning_rate": 6.8e-08,
"loss": 3.4776,
"step": 17708
},
{
"epoch": 466.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9812755584716797,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 17708
},
{
"epoch": 467.0,
"learning_rate": 6.6e-08,
"loss": 3.4849,
"step": 17746
},
{
"epoch": 467.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.981030225753784,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.514,
"eval_steps_per_second": 1.629,
"step": 17746
},
{
"epoch": 468.0,
"learning_rate": 6.4e-08,
"loss": 3.4766,
"step": 17784
},
{
"epoch": 468.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9813342094421387,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 17784
},
{
"epoch": 469.0,
"learning_rate": 6.2e-08,
"loss": 3.4791,
"step": 17822
},
{
"epoch": 469.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.981501817703247,
"eval_runtime": 0.6135,
"eval_samples_per_second": 6.52,
"eval_steps_per_second": 1.63,
"step": 17822
},
{
"epoch": 470.0,
"learning_rate": 6e-08,
"loss": 3.4814,
"step": 17860
},
{
"epoch": 470.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.98130464553833,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.627,
"step": 17860
},
{
"epoch": 471.0,
"learning_rate": 5.8e-08,
"loss": 3.4861,
"step": 17898
},
{
"epoch": 471.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.980907917022705,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 17898
},
{
"epoch": 472.0,
"learning_rate": 5.6e-08,
"loss": 3.4861,
"step": 17936
},
{
"epoch": 472.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.9806013107299805,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 17936
},
{
"epoch": 473.0,
"learning_rate": 5.3999999999999994e-08,
"loss": 3.4825,
"step": 17974
},
{
"epoch": 473.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.9808974266052246,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 17974
},
{
"epoch": 474.0,
"learning_rate": 5.1999999999999996e-08,
"loss": 3.4758,
"step": 18012
},
{
"epoch": 474.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.98111629486084,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 18012
},
{
"epoch": 475.0,
"learning_rate": 5e-08,
"loss": 3.4811,
"step": 18050
},
{
"epoch": 475.0,
"eval_accuracy": 0.42350928641251223,
"eval_loss": 3.980703592300415,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.629,
"step": 18050
},
{
"epoch": 476.0,
"learning_rate": 4.8e-08,
"loss": 3.4831,
"step": 18088
},
{
"epoch": 476.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.980832815170288,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 18088
},
{
"epoch": 477.0,
"learning_rate": 4.5999999999999995e-08,
"loss": 3.4837,
"step": 18126
},
{
"epoch": 477.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.980334758758545,
"eval_runtime": 0.613,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 18126
},
{
"epoch": 478.0,
"learning_rate": 4.4e-08,
"loss": 3.4843,
"step": 18164
},
{
"epoch": 478.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9802615642547607,
"eval_runtime": 0.6137,
"eval_samples_per_second": 6.518,
"eval_steps_per_second": 1.63,
"step": 18164
},
{
"epoch": 479.0,
"learning_rate": 4.2e-08,
"loss": 3.4825,
"step": 18202
},
{
"epoch": 479.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9801557064056396,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 18202
},
{
"epoch": 480.0,
"learning_rate": 4e-08,
"loss": 3.4807,
"step": 18240
},
{
"epoch": 480.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.979966163635254,
"eval_runtime": 0.6189,
"eval_samples_per_second": 6.463,
"eval_steps_per_second": 1.616,
"step": 18240
},
{
"epoch": 481.0,
"learning_rate": 3.7999999999999996e-08,
"loss": 3.4808,
"step": 18278
},
{
"epoch": 481.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9796664714813232,
"eval_runtime": 0.618,
"eval_samples_per_second": 6.473,
"eval_steps_per_second": 1.618,
"step": 18278
},
{
"epoch": 482.0,
"learning_rate": 3.6e-08,
"loss": 3.4805,
"step": 18316
},
{
"epoch": 482.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9796643257141113,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 18316
},
{
"epoch": 483.0,
"learning_rate": 3.4e-08,
"loss": 3.4818,
"step": 18354
},
{
"epoch": 483.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.979565382003784,
"eval_runtime": 0.6125,
"eval_samples_per_second": 6.531,
"eval_steps_per_second": 1.633,
"step": 18354
},
{
"epoch": 484.0,
"learning_rate": 3.2e-08,
"loss": 3.4821,
"step": 18392
},
{
"epoch": 484.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9793689250946045,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 1.629,
"step": 18392
},
{
"epoch": 485.0,
"learning_rate": 3e-08,
"loss": 3.4802,
"step": 18430
},
{
"epoch": 485.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.979444980621338,
"eval_runtime": 0.6144,
"eval_samples_per_second": 6.51,
"eval_steps_per_second": 1.628,
"step": 18430
},
{
"epoch": 486.0,
"learning_rate": 2.8e-08,
"loss": 3.4805,
"step": 18468
},
{
"epoch": 486.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.979588508605957,
"eval_runtime": 0.6134,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 18468
},
{
"epoch": 487.0,
"learning_rate": 2.5999999999999998e-08,
"loss": 3.4831,
"step": 18506
},
{
"epoch": 487.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9796085357666016,
"eval_runtime": 0.6257,
"eval_samples_per_second": 6.393,
"eval_steps_per_second": 1.598,
"step": 18506
},
{
"epoch": 488.0,
"learning_rate": 2.4e-08,
"loss": 3.4846,
"step": 18544
},
{
"epoch": 488.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.97976016998291,
"eval_runtime": 0.6147,
"eval_samples_per_second": 6.507,
"eval_steps_per_second": 1.627,
"step": 18544
},
{
"epoch": 489.0,
"learning_rate": 2.2e-08,
"loss": 3.4824,
"step": 18582
},
{
"epoch": 489.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9797983169555664,
"eval_runtime": 0.6255,
"eval_samples_per_second": 6.395,
"eval_steps_per_second": 1.599,
"step": 18582
},
{
"epoch": 490.0,
"learning_rate": 2e-08,
"loss": 3.4807,
"step": 18620
},
{
"epoch": 490.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9798743724823,
"eval_runtime": 0.6145,
"eval_samples_per_second": 6.509,
"eval_steps_per_second": 1.627,
"step": 18620
},
{
"epoch": 491.0,
"learning_rate": 1.8e-08,
"loss": 3.4809,
"step": 18658
},
{
"epoch": 491.0,
"eval_accuracy": 0.4239980449657869,
"eval_loss": 3.9799368381500244,
"eval_runtime": 0.6143,
"eval_samples_per_second": 6.511,
"eval_steps_per_second": 1.628,
"step": 18658
},
{
"epoch": 492.0,
"learning_rate": 1.6e-08,
"loss": 3.4801,
"step": 18696
},
{
"epoch": 492.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9799206256866455,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 18696
},
{
"epoch": 493.0,
"learning_rate": 1.4e-08,
"loss": 3.479,
"step": 18734
},
{
"epoch": 493.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9799153804779053,
"eval_runtime": 0.6136,
"eval_samples_per_second": 6.519,
"eval_steps_per_second": 1.63,
"step": 18734
},
{
"epoch": 494.0,
"learning_rate": 1.2e-08,
"loss": 3.48,
"step": 18772
},
{
"epoch": 494.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9799201488494873,
"eval_runtime": 0.6126,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 18772
},
{
"epoch": 495.0,
"learning_rate": 1e-08,
"loss": 3.4828,
"step": 18810
},
{
"epoch": 495.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9799094200134277,
"eval_runtime": 0.6243,
"eval_samples_per_second": 6.407,
"eval_steps_per_second": 1.602,
"step": 18810
},
{
"epoch": 496.0,
"learning_rate": 8e-09,
"loss": 3.4812,
"step": 18848
},
{
"epoch": 496.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.979907512664795,
"eval_runtime": 0.6138,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 1.629,
"step": 18848
},
{
"epoch": 497.0,
"learning_rate": 6e-09,
"loss": 3.4798,
"step": 18886
},
{
"epoch": 497.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.9798967838287354,
"eval_runtime": 0.6148,
"eval_samples_per_second": 6.506,
"eval_steps_per_second": 1.626,
"step": 18886
},
{
"epoch": 498.0,
"learning_rate": 4e-09,
"loss": 3.4866,
"step": 18924
},
{
"epoch": 498.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.979888677597046,
"eval_runtime": 0.614,
"eval_samples_per_second": 6.515,
"eval_steps_per_second": 1.629,
"step": 18924
},
{
"epoch": 499.0,
"learning_rate": 2e-09,
"loss": 3.4785,
"step": 18962
},
{
"epoch": 499.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.979886054992676,
"eval_runtime": 0.6141,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 1.628,
"step": 18962
},
{
"epoch": 500.0,
"learning_rate": 0.0,
"loss": 3.4893,
"step": 19000
},
{
"epoch": 500.0,
"eval_accuracy": 0.4237536656891496,
"eval_loss": 3.979886293411255,
"eval_runtime": 0.6133,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 19000
},
{
"epoch": 500.0,
"step": 19000,
"total_flos": 8.26946617344e+16,
"train_loss": 3.948820646587171,
"train_runtime": 16522.6397,
"train_samples_per_second": 1.15,
"train_steps_per_second": 1.15
}
],
"logging_steps": 500,
"max_steps": 19000,
"num_input_tokens_seen": 0,
"num_train_epochs": 500,
"save_steps": 500,
"total_flos": 8.26946617344e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}