|
{ |
|
"best_metric": 3.9793689250946045, |
|
"best_model_checkpoint": "output_hemo_neg_3/checkpoint-18392", |
|
"epoch": 500.0, |
|
"eval_steps": 500, |
|
"global_step": 19000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.98e-07, |
|
"loss": 5.9415, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.30840664711632454, |
|
"eval_loss": 5.606841087341309, |
|
"eval_runtime": 0.5994, |
|
"eval_samples_per_second": 6.673, |
|
"eval_steps_per_second": 1.668, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.959999999999999e-07, |
|
"loss": 5.7302, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.32038123167155425, |
|
"eval_loss": 5.426336288452148, |
|
"eval_runtime": 0.6035, |
|
"eval_samples_per_second": 6.628, |
|
"eval_steps_per_second": 1.657, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.94e-07, |
|
"loss": 5.5675, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.323069403714565, |
|
"eval_loss": 5.287517070770264, |
|
"eval_runtime": 0.6089, |
|
"eval_samples_per_second": 6.57, |
|
"eval_steps_per_second": 1.642, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.92e-07, |
|
"loss": 5.4594, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3250244379276637, |
|
"eval_loss": 5.205501556396484, |
|
"eval_runtime": 0.6097, |
|
"eval_samples_per_second": 6.56, |
|
"eval_steps_per_second": 1.64, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.9e-07, |
|
"loss": 5.3808, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3296676441837732, |
|
"eval_loss": 5.158883094787598, |
|
"eval_runtime": 0.6099, |
|
"eval_samples_per_second": 6.558, |
|
"eval_steps_per_second": 1.639, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.88e-07, |
|
"loss": 5.3353, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3321114369501466, |
|
"eval_loss": 5.119546413421631, |
|
"eval_runtime": 0.6108, |
|
"eval_samples_per_second": 6.549, |
|
"eval_steps_per_second": 1.637, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.86e-07, |
|
"loss": 5.2946, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.333822091886608, |
|
"eval_loss": 5.077916622161865, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.84e-07, |
|
"loss": 5.2632, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.33699902248289343, |
|
"eval_loss": 5.043184280395508, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 9.819999999999999e-07, |
|
"loss": 5.2279, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.33724340175953077, |
|
"eval_loss": 5.015382766723633, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 1.634, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.8e-07, |
|
"loss": 5.1999, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.33773216031280545, |
|
"eval_loss": 4.993128299713135, |
|
"eval_runtime": 0.612, |
|
"eval_samples_per_second": 6.535, |
|
"eval_steps_per_second": 1.634, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.78e-07, |
|
"loss": 5.1853, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.3399315738025415, |
|
"eval_loss": 4.970090389251709, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 9.759999999999998e-07, |
|
"loss": 5.1619, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3428641251221896, |
|
"eval_loss": 4.94577693939209, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 1.634, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 9.74e-07, |
|
"loss": 5.1395, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.34384164222873903, |
|
"eval_loss": 4.927363395690918, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 9.72e-07, |
|
"loss": 5.1179, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.34628543499511244, |
|
"eval_loss": 4.908015251159668, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 9.7e-07, |
|
"loss": 5.1048, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.3465298142717498, |
|
"eval_loss": 4.892131805419922, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 9.679999999999999e-07, |
|
"loss": 5.0837, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.34701857282502446, |
|
"eval_loss": 4.875644683837891, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 9.66e-07, |
|
"loss": 5.067, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.34921798631476053, |
|
"eval_loss": 4.860612869262695, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 9.64e-07, |
|
"loss": 5.0516, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.3506842619745846, |
|
"eval_loss": 4.846870422363281, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 9.619999999999999e-07, |
|
"loss": 5.0313, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.3521505376344086, |
|
"eval_loss": 4.836608409881592, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 9.6e-07, |
|
"loss": 5.0225, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.3526392961876833, |
|
"eval_loss": 4.827553749084473, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 9.58e-07, |
|
"loss": 5.0068, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.3521505376344086, |
|
"eval_loss": 4.817898273468018, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 1.633, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 9.559999999999998e-07, |
|
"loss": 4.9942, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.3521505376344086, |
|
"eval_loss": 4.805068016052246, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 9.539999999999999e-07, |
|
"loss": 4.9758, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.3526392961876833, |
|
"eval_loss": 4.796260356903076, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 9.52e-07, |
|
"loss": 4.9605, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.35288367546432065, |
|
"eval_loss": 4.78426456451416, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 1.633, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 9.499999999999999e-07, |
|
"loss": 4.9525, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.353128054740958, |
|
"eval_loss": 4.772826671600342, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 9.479999999999999e-07, |
|
"loss": 4.9409, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.35239491691104596, |
|
"eval_loss": 4.761840343475342, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.63, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 9.459999999999999e-07, |
|
"loss": 4.9328, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.3519061583577713, |
|
"eval_loss": 4.75234317779541, |
|
"eval_runtime": 0.6227, |
|
"eval_samples_per_second": 6.424, |
|
"eval_steps_per_second": 1.606, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 9.439999999999999e-07, |
|
"loss": 4.9168, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.3526392961876833, |
|
"eval_loss": 4.744428634643555, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 9.419999999999999e-07, |
|
"loss": 4.9057, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.3550830889540567, |
|
"eval_loss": 4.733183860778809, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 9.399999999999999e-07, |
|
"loss": 4.8896, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.3560606060606061, |
|
"eval_loss": 4.723690986633301, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 9.379999999999998e-07, |
|
"loss": 4.8869, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.35654936461388076, |
|
"eval_loss": 4.715620994567871, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 9.36e-07, |
|
"loss": 4.8798, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.3567937438905181, |
|
"eval_loss": 4.709283828735352, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 9.34e-07, |
|
"loss": 4.8591, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.3575268817204301, |
|
"eval_loss": 4.702914714813232, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 9.32e-07, |
|
"loss": 4.8548, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.35703812316715544, |
|
"eval_loss": 4.694584369659424, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 9.3e-07, |
|
"loss": 4.8502, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.35948191593352885, |
|
"eval_loss": 4.687127590179443, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 9.28e-07, |
|
"loss": 4.8378, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.35948191593352885, |
|
"eval_loss": 4.680301189422607, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 9.26e-07, |
|
"loss": 4.829, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.35997067448680353, |
|
"eval_loss": 4.673268795013428, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 9.24e-07, |
|
"loss": 4.8177, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.3602150537634409, |
|
"eval_loss": 4.66432523727417, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 9.22e-07, |
|
"loss": 4.809, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.36070381231671556, |
|
"eval_loss": 4.6591081619262695, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 9.2e-07, |
|
"loss": 4.8002, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.36070381231671556, |
|
"eval_loss": 4.650698661804199, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 9.18e-07, |
|
"loss": 4.7938, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.3614369501466276, |
|
"eval_loss": 4.643824577331543, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 9.16e-07, |
|
"loss": 4.7787, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.3616813294232649, |
|
"eval_loss": 4.636685848236084, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 9.14e-07, |
|
"loss": 4.7685, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.3629032258064516, |
|
"eval_loss": 4.630648136138916, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 9.12e-07, |
|
"loss": 4.762, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 4.621088981628418, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 9.1e-07, |
|
"loss": 4.7487, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.36412512218963833, |
|
"eval_loss": 4.61327600479126, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.63, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 9.08e-07, |
|
"loss": 4.7451, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.364613880742913, |
|
"eval_loss": 4.605830669403076, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 9.06e-07, |
|
"loss": 4.7378, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.3658357771260997, |
|
"eval_loss": 4.600909233093262, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 9.039999999999999e-07, |
|
"loss": 4.7281, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.3658357771260997, |
|
"eval_loss": 4.5931782722473145, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 9.02e-07, |
|
"loss": 4.7196, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.3655913978494624, |
|
"eval_loss": 4.5888590812683105, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 9e-07, |
|
"loss": 4.7091, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.36656891495601174, |
|
"eval_loss": 4.581442356109619, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 8.98e-07, |
|
"loss": 4.7032, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.3668132942326491, |
|
"eval_loss": 4.5762939453125, |
|
"eval_runtime": 0.6259, |
|
"eval_samples_per_second": 6.391, |
|
"eval_steps_per_second": 1.598, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 8.96e-07, |
|
"loss": 4.6978, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.3668132942326491, |
|
"eval_loss": 4.573066711425781, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 1.633, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 8.939999999999999e-07, |
|
"loss": 4.6908, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.36730205278592376, |
|
"eval_loss": 4.5681657791137695, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 8.92e-07, |
|
"loss": 4.6776, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.36730205278592376, |
|
"eval_loss": 4.56380558013916, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 8.9e-07, |
|
"loss": 4.6667, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.3680351906158358, |
|
"eval_loss": 4.558794975280762, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 8.88e-07, |
|
"loss": 4.6662, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.36852394916911047, |
|
"eval_loss": 4.5535197257995605, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 8.86e-07, |
|
"loss": 4.6567, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.36974584555229717, |
|
"eval_loss": 4.549376964569092, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 8.839999999999999e-07, |
|
"loss": 4.6492, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.36974584555229717, |
|
"eval_loss": 4.543338298797607, |
|
"eval_runtime": 0.6159, |
|
"eval_samples_per_second": 6.494, |
|
"eval_steps_per_second": 1.624, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 8.82e-07, |
|
"loss": 4.6442, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.36974584555229717, |
|
"eval_loss": 4.5420732498168945, |
|
"eval_runtime": 0.6226, |
|
"eval_samples_per_second": 6.424, |
|
"eval_steps_per_second": 1.606, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 8.799999999999999e-07, |
|
"loss": 4.632, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.3699902248289345, |
|
"eval_loss": 4.5368475914001465, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 6.504, |
|
"eval_steps_per_second": 1.626, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 8.78e-07, |
|
"loss": 4.6256, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.3704789833822092, |
|
"eval_loss": 4.532083511352539, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 8.76e-07, |
|
"loss": 4.6215, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.3699902248289345, |
|
"eval_loss": 4.528621673583984, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 8.739999999999999e-07, |
|
"loss": 4.6142, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.37023460410557185, |
|
"eval_loss": 4.524003982543945, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 8.72e-07, |
|
"loss": 4.6041, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3709677419354839, |
|
"eval_loss": 4.519542694091797, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 8.699999999999999e-07, |
|
"loss": 4.5984, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.37145650048875856, |
|
"eval_loss": 4.514742851257324, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 8.68e-07, |
|
"loss": 4.5919, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.37267839687194526, |
|
"eval_loss": 4.511608600616455, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 8.659999999999999e-07, |
|
"loss": 4.5838, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.3724340175953079, |
|
"eval_loss": 4.5069780349731445, |
|
"eval_runtime": 0.6152, |
|
"eval_samples_per_second": 6.502, |
|
"eval_steps_per_second": 1.626, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 8.639999999999999e-07, |
|
"loss": 4.5733, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.3724340175953079, |
|
"eval_loss": 4.503517150878906, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 8.62e-07, |
|
"loss": 4.5642, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.3721896383186706, |
|
"eval_loss": 4.500667095184326, |
|
"eval_runtime": 0.6248, |
|
"eval_samples_per_second": 6.402, |
|
"eval_steps_per_second": 1.6, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 8.599999999999999e-07, |
|
"loss": 4.5607, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.37194525904203324, |
|
"eval_loss": 4.4967780113220215, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.627, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 8.58e-07, |
|
"loss": 4.5543, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.3729227761485826, |
|
"eval_loss": 4.492751121520996, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 8.559999999999999e-07, |
|
"loss": 4.5502, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.3729227761485826, |
|
"eval_loss": 4.489741325378418, |
|
"eval_runtime": 0.6243, |
|
"eval_samples_per_second": 6.407, |
|
"eval_steps_per_second": 1.602, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 8.539999999999999e-07, |
|
"loss": 4.5505, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.3736559139784946, |
|
"eval_loss": 4.487486839294434, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 8.52e-07, |
|
"loss": 4.537, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.37316715542521994, |
|
"eval_loss": 4.483956813812256, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 8.499999999999999e-07, |
|
"loss": 4.529, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.374633431085044, |
|
"eval_loss": 4.480215549468994, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 8.48e-07, |
|
"loss": 4.5201, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.37487781036168133, |
|
"eval_loss": 4.4763689041137695, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 8.459999999999999e-07, |
|
"loss": 4.5176, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.37512218963831867, |
|
"eval_loss": 4.472899436950684, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 8.439999999999999e-07, |
|
"loss": 4.5087, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.37512218963831867, |
|
"eval_loss": 4.4715986251831055, |
|
"eval_runtime": 0.627, |
|
"eval_samples_per_second": 6.379, |
|
"eval_steps_per_second": 1.595, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 8.419999999999999e-07, |
|
"loss": 4.504, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.37438905180840665, |
|
"eval_loss": 4.468360900878906, |
|
"eval_runtime": 0.6153, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 1.625, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 8.399999999999999e-07, |
|
"loss": 4.4914, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.37512218963831867, |
|
"eval_loss": 4.463363170623779, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 8.38e-07, |
|
"loss": 4.4907, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.37512218963831867, |
|
"eval_loss": 4.461572170257568, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 8.359999999999999e-07, |
|
"loss": 4.483, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.375366568914956, |
|
"eval_loss": 4.45780086517334, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 8.34e-07, |
|
"loss": 4.4792, |
|
"step": 3154 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.3741446725317693, |
|
"eval_loss": 4.454073429107666, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 3154 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 8.319999999999999e-07, |
|
"loss": 4.4705, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.37438905180840665, |
|
"eval_loss": 4.451131820678711, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 8.299999999999999e-07, |
|
"loss": 4.4647, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.37487781036168133, |
|
"eval_loss": 4.448835372924805, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 8.28e-07, |
|
"loss": 4.4617, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.37512218963831867, |
|
"eval_loss": 4.444460391998291, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 8.259999999999999e-07, |
|
"loss": 4.453, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.37512218963831867, |
|
"eval_loss": 4.438481330871582, |
|
"eval_runtime": 0.6245, |
|
"eval_samples_per_second": 6.405, |
|
"eval_steps_per_second": 1.601, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 8.24e-07, |
|
"loss": 4.4488, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.3763440860215054, |
|
"eval_loss": 4.435319423675537, |
|
"eval_runtime": 0.6159, |
|
"eval_samples_per_second": 6.494, |
|
"eval_steps_per_second": 1.624, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 8.219999999999999e-07, |
|
"loss": 4.4424, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.3765884652981427, |
|
"eval_loss": 4.432227611541748, |
|
"eval_runtime": 0.6169, |
|
"eval_samples_per_second": 6.484, |
|
"eval_steps_per_second": 1.621, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 8.199999999999999e-07, |
|
"loss": 4.433, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.3765884652981427, |
|
"eval_loss": 4.4299702644348145, |
|
"eval_runtime": 0.6255, |
|
"eval_samples_per_second": 6.395, |
|
"eval_steps_per_second": 1.599, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 8.179999999999999e-07, |
|
"loss": 4.4252, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.3763440860215054, |
|
"eval_loss": 4.425891399383545, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 8.159999999999999e-07, |
|
"loss": 4.4226, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.37732160312805474, |
|
"eval_loss": 4.421455383300781, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 8.14e-07, |
|
"loss": 4.4144, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.3770772238514174, |
|
"eval_loss": 4.41888427734375, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 8.12e-07, |
|
"loss": 4.4047, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.3770772238514174, |
|
"eval_loss": 4.416011333465576, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 1.633, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 8.1e-07, |
|
"loss": 4.4071, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.37732160312805474, |
|
"eval_loss": 4.413094997406006, |
|
"eval_runtime": 0.6249, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 1.6, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 8.08e-07, |
|
"loss": 4.3975, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.37732160312805474, |
|
"eval_loss": 4.409505367279053, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 6.532, |
|
"eval_steps_per_second": 1.633, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 8.06e-07, |
|
"loss": 4.3897, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.3770772238514174, |
|
"eval_loss": 4.408539772033691, |
|
"eval_runtime": 1.9183, |
|
"eval_samples_per_second": 2.085, |
|
"eval_steps_per_second": 0.521, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 8.04e-07, |
|
"loss": 4.3869, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.3770772238514174, |
|
"eval_loss": 4.405216693878174, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 8.02e-07, |
|
"loss": 4.3751, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.37732160312805474, |
|
"eval_loss": 4.402120113372803, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 8e-07, |
|
"loss": 4.3698, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.37683284457478006, |
|
"eval_loss": 4.398764610290527, |
|
"eval_runtime": 0.6238, |
|
"eval_samples_per_second": 6.412, |
|
"eval_steps_per_second": 1.603, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 7.98e-07, |
|
"loss": 4.368, |
|
"step": 3838 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.37683284457478006, |
|
"eval_loss": 4.394458293914795, |
|
"eval_runtime": 0.6261, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 1.597, |
|
"step": 3838 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 7.96e-07, |
|
"loss": 4.3643, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.3770772238514174, |
|
"eval_loss": 4.391842842102051, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 7.94e-07, |
|
"loss": 4.3552, |
|
"step": 3914 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.3765884652981427, |
|
"eval_loss": 4.389264106750488, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3914 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 7.92e-07, |
|
"loss": 4.3478, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.3775659824046921, |
|
"eval_loss": 4.386912822723389, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 7.9e-07, |
|
"loss": 4.3438, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.37805474095796676, |
|
"eval_loss": 4.3847856521606445, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 7.88e-07, |
|
"loss": 4.3362, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.37732160312805474, |
|
"eval_loss": 4.38198184967041, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 7.86e-07, |
|
"loss": 4.3356, |
|
"step": 4066 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.3778103616813294, |
|
"eval_loss": 4.37683629989624, |
|
"eval_runtime": 0.6124, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4066 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 7.84e-07, |
|
"loss": 4.3263, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.3775659824046921, |
|
"eval_loss": 4.376446723937988, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 7.82e-07, |
|
"loss": 4.3238, |
|
"step": 4142 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.3778103616813294, |
|
"eval_loss": 4.373225688934326, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4142 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 7.799999999999999e-07, |
|
"loss": 4.3157, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.37805474095796676, |
|
"eval_loss": 4.369943618774414, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 7.78e-07, |
|
"loss": 4.311, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.37805474095796676, |
|
"eval_loss": 4.367816925048828, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 7.76e-07, |
|
"loss": 4.3048, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.3787878787878788, |
|
"eval_loss": 4.364564895629883, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 7.74e-07, |
|
"loss": 4.2955, |
|
"step": 4294 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.37927663734115347, |
|
"eval_loss": 4.364035606384277, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4294 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 7.72e-07, |
|
"loss": 4.2914, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.37927663734115347, |
|
"eval_loss": 4.360426425933838, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 7.699999999999999e-07, |
|
"loss": 4.286, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.3790322580645161, |
|
"eval_loss": 4.3580002784729, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 7.68e-07, |
|
"loss": 4.2857, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.3790322580645161, |
|
"eval_loss": 4.354123115539551, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 7.66e-07, |
|
"loss": 4.2776, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.37927663734115347, |
|
"eval_loss": 4.352733612060547, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 7.64e-07, |
|
"loss": 4.2734, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.38025415444770283, |
|
"eval_loss": 4.348194599151611, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 7.62e-07, |
|
"loss": 4.2646, |
|
"step": 4522 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.3800097751710655, |
|
"eval_loss": 4.346100330352783, |
|
"eval_runtime": 0.7901, |
|
"eval_samples_per_second": 5.062, |
|
"eval_steps_per_second": 1.266, |
|
"step": 4522 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 7.599999999999999e-07, |
|
"loss": 4.2632, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.38025415444770283, |
|
"eval_loss": 4.3445892333984375, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 7.58e-07, |
|
"loss": 4.2586, |
|
"step": 4598 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.3807429130009775, |
|
"eval_loss": 4.340865135192871, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4598 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 7.559999999999999e-07, |
|
"loss": 4.2564, |
|
"step": 4636 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.3812316715542522, |
|
"eval_loss": 4.3399505615234375, |
|
"eval_runtime": 0.6124, |
|
"eval_samples_per_second": 6.532, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4636 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 7.54e-07, |
|
"loss": 4.2423, |
|
"step": 4674 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.3807429130009775, |
|
"eval_loss": 4.335657596588135, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 4674 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 7.52e-07, |
|
"loss": 4.2425, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.3807429130009775, |
|
"eval_loss": 4.3334856033325195, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 7.5e-07, |
|
"loss": 4.2367, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.38098729227761485, |
|
"eval_loss": 4.330577373504639, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 7.48e-07, |
|
"loss": 4.2301, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.38147605083088953, |
|
"eval_loss": 4.3291544914245605, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 7.459999999999999e-07, |
|
"loss": 4.2286, |
|
"step": 4826 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.3812316715542522, |
|
"eval_loss": 4.327591419219971, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 4826 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 7.44e-07, |
|
"loss": 4.2184, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.38220918866080156, |
|
"eval_loss": 4.32462215423584, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 7.42e-07, |
|
"loss": 4.2156, |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.38269794721407624, |
|
"eval_loss": 4.3210039138793945, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 7.4e-07, |
|
"loss": 4.2116, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.38343108504398826, |
|
"eval_loss": 4.318737506866455, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 7.38e-07, |
|
"loss": 4.2008, |
|
"step": 4978 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.38343108504398826, |
|
"eval_loss": 4.316496849060059, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 4978 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 7.359999999999999e-07, |
|
"loss": 4.1995, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.38343108504398826, |
|
"eval_loss": 4.3134074211120605, |
|
"eval_runtime": 0.6223, |
|
"eval_samples_per_second": 6.428, |
|
"eval_steps_per_second": 1.607, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 7.34e-07, |
|
"loss": 4.19, |
|
"step": 5054 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.3841642228739003, |
|
"eval_loss": 4.313587665557861, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 5054 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 7.319999999999999e-07, |
|
"loss": 4.1828, |
|
"step": 5092 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.3841642228739003, |
|
"eval_loss": 4.311624050140381, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 5092 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 7.3e-07, |
|
"loss": 4.1815, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.38465298142717497, |
|
"eval_loss": 4.306524276733398, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 7.28e-07, |
|
"loss": 4.1771, |
|
"step": 5168 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.38391984359726294, |
|
"eval_loss": 4.305095195770264, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 1.633, |
|
"step": 5168 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 7.259999999999999e-07, |
|
"loss": 4.1744, |
|
"step": 5206 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.38465298142717497, |
|
"eval_loss": 4.301632881164551, |
|
"eval_runtime": 1.3483, |
|
"eval_samples_per_second": 2.967, |
|
"eval_steps_per_second": 0.742, |
|
"step": 5206 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 7.24e-07, |
|
"loss": 4.1717, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.38465298142717497, |
|
"eval_loss": 4.297549247741699, |
|
"eval_runtime": 0.6216, |
|
"eval_samples_per_second": 6.435, |
|
"eval_steps_per_second": 1.609, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 7.219999999999999e-07, |
|
"loss": 4.1616, |
|
"step": 5282 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.38465298142717497, |
|
"eval_loss": 4.296638488769531, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 5282 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 7.2e-07, |
|
"loss": 4.1582, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.38465298142717497, |
|
"eval_loss": 4.29475212097168, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 7.179999999999999e-07, |
|
"loss": 4.1583, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.3848973607038123, |
|
"eval_loss": 4.293056488037109, |
|
"eval_runtime": 3.2385, |
|
"eval_samples_per_second": 1.235, |
|
"eval_steps_per_second": 0.309, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 7.159999999999999e-07, |
|
"loss": 4.148, |
|
"step": 5396 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.385386119257087, |
|
"eval_loss": 4.289401054382324, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 5396 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 7.14e-07, |
|
"loss": 4.1417, |
|
"step": 5434 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.3848973607038123, |
|
"eval_loss": 4.286114692687988, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 5434 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 7.119999999999999e-07, |
|
"loss": 4.1386, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.386119257086999, |
|
"eval_loss": 4.286536693572998, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 7.1e-07, |
|
"loss": 4.133, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.386119257086999, |
|
"eval_loss": 4.283446311950684, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 7.079999999999999e-07, |
|
"loss": 4.129, |
|
"step": 5548 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.38636363636363635, |
|
"eval_loss": 4.279318332672119, |
|
"eval_runtime": 0.6163, |
|
"eval_samples_per_second": 6.491, |
|
"eval_steps_per_second": 1.623, |
|
"step": 5548 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 7.059999999999999e-07, |
|
"loss": 4.12, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.386119257086999, |
|
"eval_loss": 4.278520584106445, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 7.04e-07, |
|
"loss": 4.1206, |
|
"step": 5624 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.38636363636363635, |
|
"eval_loss": 4.274984836578369, |
|
"eval_runtime": 0.6214, |
|
"eval_samples_per_second": 6.437, |
|
"eval_steps_per_second": 1.609, |
|
"step": 5624 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 7.019999999999999e-07, |
|
"loss": 4.1226, |
|
"step": 5662 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.3870967741935484, |
|
"eval_loss": 4.274369716644287, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 1.633, |
|
"step": 5662 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 7e-07, |
|
"loss": 4.1104, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.3866080156402737, |
|
"eval_loss": 4.272345066070557, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 6.979999999999999e-07, |
|
"loss": 4.1093, |
|
"step": 5738 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.3870967741935484, |
|
"eval_loss": 4.267661094665527, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 5738 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 6.959999999999999e-07, |
|
"loss": 4.0989, |
|
"step": 5776 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.38685239491691104, |
|
"eval_loss": 4.265379428863525, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 5776 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 6.939999999999999e-07, |
|
"loss": 4.1035, |
|
"step": 5814 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.3878299120234604, |
|
"eval_loss": 4.264577865600586, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 5814 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 6.919999999999999e-07, |
|
"loss": 4.0949, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.38807429130009774, |
|
"eval_loss": 4.263481616973877, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 6.9e-07, |
|
"loss": 4.0921, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.3883186705767351, |
|
"eval_loss": 4.260597229003906, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 6.879999999999999e-07, |
|
"loss": 4.0883, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.3885630498533724, |
|
"eval_loss": 4.256484508514404, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 6.86e-07, |
|
"loss": 4.0794, |
|
"step": 5966 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.38929618768328444, |
|
"eval_loss": 4.25582218170166, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 5966 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 6.84e-07, |
|
"loss": 4.0754, |
|
"step": 6004 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.38880742913000976, |
|
"eval_loss": 4.2530412673950195, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 6004 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 6.82e-07, |
|
"loss": 4.0756, |
|
"step": 6042 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.38929618768328444, |
|
"eval_loss": 4.249640464782715, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 6042 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 6.800000000000001e-07, |
|
"loss": 4.067, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.38880742913000976, |
|
"eval_loss": 4.250114917755127, |
|
"eval_runtime": 0.6115, |
|
"eval_samples_per_second": 6.541, |
|
"eval_steps_per_second": 1.635, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 6.78e-07, |
|
"loss": 4.0627, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.3890518084066471, |
|
"eval_loss": 4.24841833114624, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 6.76e-07, |
|
"loss": 4.0586, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.3897849462365591, |
|
"eval_loss": 4.243945121765137, |
|
"eval_runtime": 0.6246, |
|
"eval_samples_per_second": 6.404, |
|
"eval_steps_per_second": 1.601, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 6.74e-07, |
|
"loss": 4.0577, |
|
"step": 6194 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.38929618768328444, |
|
"eval_loss": 4.243143081665039, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 6194 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 6.72e-07, |
|
"loss": 4.055, |
|
"step": 6232 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.3895405669599218, |
|
"eval_loss": 4.239078044891357, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 6232 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 6.7e-07, |
|
"loss": 4.0419, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.3895405669599218, |
|
"eval_loss": 4.239559650421143, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 6.68e-07, |
|
"loss": 4.0411, |
|
"step": 6308 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.3902737047898338, |
|
"eval_loss": 4.236454486846924, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 6308 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 6.66e-07, |
|
"loss": 4.0405, |
|
"step": 6346 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.3907624633431085, |
|
"eval_loss": 4.235616683959961, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 6346 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"learning_rate": 6.64e-07, |
|
"loss": 4.0327, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.39051808406647115, |
|
"eval_loss": 4.234899044036865, |
|
"eval_runtime": 0.6121, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 1.634, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"learning_rate": 6.62e-07, |
|
"loss": 4.0262, |
|
"step": 6422 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_accuracy": 0.3912512218963832, |
|
"eval_loss": 4.231151580810547, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.626, |
|
"step": 6422 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"learning_rate": 6.6e-07, |
|
"loss": 4.0252, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_accuracy": 0.3912512218963832, |
|
"eval_loss": 4.230025291442871, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"learning_rate": 6.58e-07, |
|
"loss": 4.0237, |
|
"step": 6498 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_accuracy": 0.3914956011730205, |
|
"eval_loss": 4.225388526916504, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 6498 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"learning_rate": 6.56e-07, |
|
"loss": 4.024, |
|
"step": 6536 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.3919843597262952, |
|
"eval_loss": 4.224780082702637, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 6536 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"learning_rate": 6.54e-07, |
|
"loss": 4.0137, |
|
"step": 6574 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_accuracy": 0.39222873900293254, |
|
"eval_loss": 4.221837997436523, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 6574 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"learning_rate": 6.52e-07, |
|
"loss": 4.0108, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_accuracy": 0.3927174975562072, |
|
"eval_loss": 4.222439765930176, |
|
"eval_runtime": 0.6168, |
|
"eval_samples_per_second": 6.485, |
|
"eval_steps_per_second": 1.621, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 6.5e-07, |
|
"loss": 4.0037, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_accuracy": 0.3939393939393939, |
|
"eval_loss": 4.219006538391113, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 6.48e-07, |
|
"loss": 4.0021, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.3936950146627566, |
|
"eval_loss": 4.218034267425537, |
|
"eval_runtime": 0.6156, |
|
"eval_samples_per_second": 6.498, |
|
"eval_steps_per_second": 1.625, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"learning_rate": 6.46e-07, |
|
"loss": 3.9949, |
|
"step": 6726 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_accuracy": 0.39418377321603126, |
|
"eval_loss": 4.215020656585693, |
|
"eval_runtime": 0.6221, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 1.607, |
|
"step": 6726 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"learning_rate": 6.44e-07, |
|
"loss": 3.9957, |
|
"step": 6764 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_accuracy": 0.3939393939393939, |
|
"eval_loss": 4.213464260101318, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 6764 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"learning_rate": 6.42e-07, |
|
"loss": 3.9923, |
|
"step": 6802 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_accuracy": 0.39418377321603126, |
|
"eval_loss": 4.209378242492676, |
|
"eval_runtime": 0.6122, |
|
"eval_samples_per_second": 6.534, |
|
"eval_steps_per_second": 1.634, |
|
"step": 6802 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"learning_rate": 6.4e-07, |
|
"loss": 3.9853, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.3949169110459433, |
|
"eval_loss": 4.209150314331055, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"learning_rate": 6.38e-07, |
|
"loss": 3.9779, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_accuracy": 0.3949169110459433, |
|
"eval_loss": 4.2085700035095215, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"learning_rate": 6.36e-07, |
|
"loss": 3.9826, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_accuracy": 0.39467253176930595, |
|
"eval_loss": 4.204543590545654, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 1.632, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"learning_rate": 6.34e-07, |
|
"loss": 3.9775, |
|
"step": 6954 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_accuracy": 0.3949169110459433, |
|
"eval_loss": 4.201192855834961, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 6954 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"learning_rate": 6.319999999999999e-07, |
|
"loss": 3.9706, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.39613880742913, |
|
"eval_loss": 4.200508117675781, |
|
"eval_runtime": 0.6124, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"learning_rate": 6.3e-07, |
|
"loss": 3.9672, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_accuracy": 0.3956500488758553, |
|
"eval_loss": 4.19916296005249, |
|
"eval_runtime": 0.6242, |
|
"eval_samples_per_second": 6.408, |
|
"eval_steps_per_second": 1.602, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"learning_rate": 6.28e-07, |
|
"loss": 3.9707, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_accuracy": 0.3966275659824047, |
|
"eval_loss": 4.196375370025635, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"learning_rate": 6.26e-07, |
|
"loss": 3.9585, |
|
"step": 7106 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_accuracy": 0.39711632453567935, |
|
"eval_loss": 4.195079326629639, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 7106 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"learning_rate": 6.24e-07, |
|
"loss": 3.9552, |
|
"step": 7144 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.3966275659824047, |
|
"eval_loss": 4.192666530609131, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 7144 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"learning_rate": 6.219999999999999e-07, |
|
"loss": 3.9526, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_accuracy": 0.3966275659824047, |
|
"eval_loss": 4.1922197341918945, |
|
"eval_runtime": 0.6118, |
|
"eval_samples_per_second": 6.538, |
|
"eval_steps_per_second": 1.635, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"learning_rate": 6.2e-07, |
|
"loss": 3.9514, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_accuracy": 0.396871945259042, |
|
"eval_loss": 4.18861722946167, |
|
"eval_runtime": 0.6118, |
|
"eval_samples_per_second": 6.538, |
|
"eval_steps_per_second": 1.635, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"learning_rate": 6.18e-07, |
|
"loss": 3.9464, |
|
"step": 7258 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_accuracy": 0.39760508308895404, |
|
"eval_loss": 4.188557147979736, |
|
"eval_runtime": 0.667, |
|
"eval_samples_per_second": 5.997, |
|
"eval_steps_per_second": 1.499, |
|
"step": 7258 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 6.16e-07, |
|
"loss": 3.9433, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.3980938416422287, |
|
"eval_loss": 4.185554504394531, |
|
"eval_runtime": 0.6187, |
|
"eval_samples_per_second": 6.466, |
|
"eval_steps_per_second": 1.616, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"learning_rate": 6.14e-07, |
|
"loss": 3.9378, |
|
"step": 7334 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 4.184579372406006, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 7334 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"learning_rate": 6.119999999999999e-07, |
|
"loss": 3.9362, |
|
"step": 7372 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_accuracy": 0.3980938416422287, |
|
"eval_loss": 4.1830949783325195, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 7372 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"learning_rate": 6.1e-07, |
|
"loss": 3.9307, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_accuracy": 0.3980938416422287, |
|
"eval_loss": 4.182034969329834, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"learning_rate": 6.079999999999999e-07, |
|
"loss": 3.9324, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 4.176692485809326, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"learning_rate": 6.06e-07, |
|
"loss": 3.9223, |
|
"step": 7486 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_accuracy": 0.39833822091886606, |
|
"eval_loss": 4.179370403289795, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 7486 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"learning_rate": 6.04e-07, |
|
"loss": 3.9279, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_accuracy": 0.3985826001955034, |
|
"eval_loss": 4.1752119064331055, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"learning_rate": 6.019999999999999e-07, |
|
"loss": 3.9214, |
|
"step": 7562 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_accuracy": 0.3980938416422287, |
|
"eval_loss": 4.172707557678223, |
|
"eval_runtime": 0.6174, |
|
"eval_samples_per_second": 6.479, |
|
"eval_steps_per_second": 1.62, |
|
"step": 7562 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 6e-07, |
|
"loss": 3.9122, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 4.174560070037842, |
|
"eval_runtime": 0.7746, |
|
"eval_samples_per_second": 5.164, |
|
"eval_steps_per_second": 1.291, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"learning_rate": 5.979999999999999e-07, |
|
"loss": 3.9099, |
|
"step": 7638 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"eval_accuracy": 0.39956011730205276, |
|
"eval_loss": 4.169778823852539, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 7638 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"learning_rate": 5.96e-07, |
|
"loss": 3.9075, |
|
"step": 7676 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"eval_accuracy": 0.3993157380254154, |
|
"eval_loss": 4.169203758239746, |
|
"eval_runtime": 0.6199, |
|
"eval_samples_per_second": 6.452, |
|
"eval_steps_per_second": 1.613, |
|
"step": 7676 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"learning_rate": 5.939999999999999e-07, |
|
"loss": 3.9095, |
|
"step": 7714 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"eval_accuracy": 0.40004887585532745, |
|
"eval_loss": 4.16612434387207, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 7714 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"learning_rate": 5.919999999999999e-07, |
|
"loss": 3.9, |
|
"step": 7752 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 4.163661956787109, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 7752 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"learning_rate": 5.9e-07, |
|
"loss": 3.9004, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"eval_accuracy": 0.4002932551319648, |
|
"eval_loss": 4.161859512329102, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"learning_rate": 5.879999999999999e-07, |
|
"loss": 3.8978, |
|
"step": 7828 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"eval_accuracy": 0.40053763440860213, |
|
"eval_loss": 4.160345554351807, |
|
"eval_runtime": 0.6636, |
|
"eval_samples_per_second": 6.028, |
|
"eval_steps_per_second": 1.507, |
|
"step": 7828 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"learning_rate": 5.86e-07, |
|
"loss": 3.8918, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"eval_accuracy": 0.40053763440860213, |
|
"eval_loss": 4.158294677734375, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"learning_rate": 5.839999999999999e-07, |
|
"loss": 3.8848, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 4.158019542694092, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"learning_rate": 5.819999999999999e-07, |
|
"loss": 3.8831, |
|
"step": 7942 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"eval_accuracy": 0.40004887585532745, |
|
"eval_loss": 4.1576619148254395, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 7942 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"learning_rate": 5.8e-07, |
|
"loss": 3.8821, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"eval_accuracy": 0.40053763440860213, |
|
"eval_loss": 4.154994487762451, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"learning_rate": 5.779999999999999e-07, |
|
"loss": 3.8818, |
|
"step": 8018 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 4.152185440063477, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 8018 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"learning_rate": 5.76e-07, |
|
"loss": 3.8764, |
|
"step": 8056 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 4.152061462402344, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 8056 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"learning_rate": 5.739999999999999e-07, |
|
"loss": 3.8704, |
|
"step": 8094 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"eval_accuracy": 0.4010263929618768, |
|
"eval_loss": 4.14907693862915, |
|
"eval_runtime": 0.6221, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 1.607, |
|
"step": 8094 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"learning_rate": 5.719999999999999e-07, |
|
"loss": 3.8725, |
|
"step": 8132 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"eval_accuracy": 0.4010263929618768, |
|
"eval_loss": 4.149218559265137, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 8132 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"learning_rate": 5.699999999999999e-07, |
|
"loss": 3.8698, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"eval_accuracy": 0.4010263929618768, |
|
"eval_loss": 4.146964073181152, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"learning_rate": 5.679999999999999e-07, |
|
"loss": 3.8654, |
|
"step": 8208 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_accuracy": 0.40175953079178883, |
|
"eval_loss": 4.146454811096191, |
|
"eval_runtime": 0.6121, |
|
"eval_samples_per_second": 6.535, |
|
"eval_steps_per_second": 1.634, |
|
"step": 8208 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"learning_rate": 5.66e-07, |
|
"loss": 3.8608, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"eval_accuracy": 0.4020039100684262, |
|
"eval_loss": 4.145140171051025, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"learning_rate": 5.639999999999999e-07, |
|
"loss": 3.8584, |
|
"step": 8284 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"eval_accuracy": 0.4015151515151515, |
|
"eval_loss": 4.142205715179443, |
|
"eval_runtime": 0.6251, |
|
"eval_samples_per_second": 6.399, |
|
"eval_steps_per_second": 1.6, |
|
"step": 8284 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"learning_rate": 5.620000000000001e-07, |
|
"loss": 3.8546, |
|
"step": 8322 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"eval_accuracy": 0.40249266862170086, |
|
"eval_loss": 4.1411662101745605, |
|
"eval_runtime": 0.6119, |
|
"eval_samples_per_second": 6.537, |
|
"eval_steps_per_second": 1.634, |
|
"step": 8322 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"learning_rate": 5.6e-07, |
|
"loss": 3.8494, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_accuracy": 0.4022482893450635, |
|
"eval_loss": 4.140811920166016, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"learning_rate": 5.58e-07, |
|
"loss": 3.8479, |
|
"step": 8398 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"eval_accuracy": 0.40249266862170086, |
|
"eval_loss": 4.13836145401001, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 8398 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"learning_rate": 5.560000000000001e-07, |
|
"loss": 3.8463, |
|
"step": 8436 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"eval_accuracy": 0.40249266862170086, |
|
"eval_loss": 4.136462688446045, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 8436 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"learning_rate": 5.54e-07, |
|
"loss": 3.8422, |
|
"step": 8474 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"eval_accuracy": 0.40298142717497554, |
|
"eval_loss": 4.1326165199279785, |
|
"eval_runtime": 0.6246, |
|
"eval_samples_per_second": 6.404, |
|
"eval_steps_per_second": 1.601, |
|
"step": 8474 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"learning_rate": 5.520000000000001e-07, |
|
"loss": 3.8395, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.4022482893450635, |
|
"eval_loss": 4.133283615112305, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 5.5e-07, |
|
"loss": 3.8369, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"eval_accuracy": 0.4034701857282502, |
|
"eval_loss": 4.133824825286865, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"learning_rate": 5.48e-07, |
|
"loss": 3.8357, |
|
"step": 8588 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 4.129902362823486, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 8588 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"learning_rate": 5.46e-07, |
|
"loss": 3.8318, |
|
"step": 8626 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"eval_accuracy": 0.40420332355816224, |
|
"eval_loss": 4.129788398742676, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 8626 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"learning_rate": 5.44e-07, |
|
"loss": 3.8258, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_accuracy": 0.4039589442815249, |
|
"eval_loss": 4.129807472229004, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"learning_rate": 5.420000000000001e-07, |
|
"loss": 3.8265, |
|
"step": 8702 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"eval_accuracy": 0.4044477028347996, |
|
"eval_loss": 4.127597332000732, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 8702 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"learning_rate": 5.4e-07, |
|
"loss": 3.8229, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"eval_accuracy": 0.40420332355816224, |
|
"eval_loss": 4.126589298248291, |
|
"eval_runtime": 0.6331, |
|
"eval_samples_per_second": 6.318, |
|
"eval_steps_per_second": 1.58, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"learning_rate": 5.38e-07, |
|
"loss": 3.8139, |
|
"step": 8778 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"eval_accuracy": 0.40420332355816224, |
|
"eval_loss": 4.125330448150635, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 8778 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"learning_rate": 5.36e-07, |
|
"loss": 3.8132, |
|
"step": 8816 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 4.1250810623168945, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 8816 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"learning_rate": 5.34e-07, |
|
"loss": 3.8126, |
|
"step": 8854 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 4.122879505157471, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 8854 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"learning_rate": 5.32e-07, |
|
"loss": 3.8074, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"eval_accuracy": 0.40640273704789837, |
|
"eval_loss": 4.121622085571289, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"learning_rate": 5.3e-07, |
|
"loss": 3.8072, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 4.121754169464111, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.63, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"learning_rate": 5.28e-07, |
|
"loss": 3.8056, |
|
"step": 8968 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 4.116854667663574, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 8968 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"learning_rate": 5.26e-07, |
|
"loss": 3.8038, |
|
"step": 9006 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 4.116855621337891, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 9006 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"learning_rate": 5.24e-07, |
|
"loss": 3.8025, |
|
"step": 9044 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 4.115084648132324, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 9044 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"learning_rate": 5.22e-07, |
|
"loss": 3.7948, |
|
"step": 9082 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"eval_accuracy": 0.40689149560117305, |
|
"eval_loss": 4.11461877822876, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.63, |
|
"step": 9082 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"learning_rate": 5.2e-07, |
|
"loss": 3.7929, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 4.1119794845581055, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"learning_rate": 5.18e-07, |
|
"loss": 3.7922, |
|
"step": 9158 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"eval_accuracy": 0.40689149560117305, |
|
"eval_loss": 4.111790180206299, |
|
"eval_runtime": 0.6215, |
|
"eval_samples_per_second": 6.436, |
|
"eval_steps_per_second": 1.609, |
|
"step": 9158 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"learning_rate": 5.16e-07, |
|
"loss": 3.7897, |
|
"step": 9196 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"eval_accuracy": 0.40762463343108507, |
|
"eval_loss": 4.109217166900635, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 9196 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"learning_rate": 5.14e-07, |
|
"loss": 3.7877, |
|
"step": 9234 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"eval_accuracy": 0.4078690127077224, |
|
"eval_loss": 4.107990741729736, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 9234 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"learning_rate": 5.12e-07, |
|
"loss": 3.7829, |
|
"step": 9272 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_accuracy": 0.4071358748778104, |
|
"eval_loss": 4.1082682609558105, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 9272 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"learning_rate": 5.1e-07, |
|
"loss": 3.7814, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"eval_accuracy": 0.40762463343108507, |
|
"eval_loss": 4.108653545379639, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"learning_rate": 5.079999999999999e-07, |
|
"loss": 3.781, |
|
"step": 9348 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"eval_accuracy": 0.4071358748778104, |
|
"eval_loss": 4.1042561531066895, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 9348 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"learning_rate": 5.06e-07, |
|
"loss": 3.7728, |
|
"step": 9386 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"eval_accuracy": 0.40811339198435975, |
|
"eval_loss": 4.102220058441162, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 9386 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"learning_rate": 5.04e-07, |
|
"loss": 3.779, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_accuracy": 0.40811339198435975, |
|
"eval_loss": 4.101465225219727, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 1.632, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"learning_rate": 5.02e-07, |
|
"loss": 3.7716, |
|
"step": 9462 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"eval_accuracy": 0.4078690127077224, |
|
"eval_loss": 4.103041172027588, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 9462 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 5e-07, |
|
"loss": 3.7674, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_accuracy": 0.4078690127077224, |
|
"eval_loss": 4.099481105804443, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"learning_rate": 4.979999999999999e-07, |
|
"loss": 3.7665, |
|
"step": 9538 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.0990800857543945, |
|
"eval_runtime": 0.6218, |
|
"eval_samples_per_second": 6.433, |
|
"eval_steps_per_second": 1.608, |
|
"step": 9538 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"learning_rate": 4.96e-07, |
|
"loss": 3.7603, |
|
"step": 9576 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_accuracy": 0.40738025415444773, |
|
"eval_loss": 4.100230693817139, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 9576 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"learning_rate": 4.94e-07, |
|
"loss": 3.7645, |
|
"step": 9614 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.095699787139893, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 9614 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"learning_rate": 4.92e-07, |
|
"loss": 3.7622, |
|
"step": 9652 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"eval_accuracy": 0.4083577712609971, |
|
"eval_loss": 4.0959062576293945, |
|
"eval_runtime": 2.189, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.457, |
|
"step": 9652 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"learning_rate": 4.9e-07, |
|
"loss": 3.7583, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"eval_accuracy": 0.4083577712609971, |
|
"eval_loss": 4.0954976081848145, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"learning_rate": 4.879999999999999e-07, |
|
"loss": 3.752, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.0929741859436035, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"learning_rate": 4.86e-07, |
|
"loss": 3.7545, |
|
"step": 9766 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.0912184715271, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 9766 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"learning_rate": 4.839999999999999e-07, |
|
"loss": 3.7447, |
|
"step": 9804 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.092291831970215, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 9804 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"learning_rate": 4.82e-07, |
|
"loss": 3.7483, |
|
"step": 9842 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.089372158050537, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 9842 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"learning_rate": 4.8e-07, |
|
"loss": 3.7428, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.090963840484619, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"learning_rate": 4.779999999999999e-07, |
|
"loss": 3.7407, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.087746620178223, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"learning_rate": 4.76e-07, |
|
"loss": 3.7405, |
|
"step": 9956 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.089057922363281, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 9956 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"learning_rate": 4.7399999999999993e-07, |
|
"loss": 3.7354, |
|
"step": 9994 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"eval_accuracy": 0.4088465298142718, |
|
"eval_loss": 4.0869574546813965, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 9994 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"learning_rate": 4.7199999999999994e-07, |
|
"loss": 3.7353, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_accuracy": 0.40860215053763443, |
|
"eval_loss": 4.085577487945557, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"learning_rate": 4.6999999999999995e-07, |
|
"loss": 3.7312, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.083754062652588, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"learning_rate": 4.68e-07, |
|
"loss": 3.7313, |
|
"step": 10108 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.082942485809326, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 10108 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"learning_rate": 4.66e-07, |
|
"loss": 3.7264, |
|
"step": 10146 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.0826802253723145, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 10146 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"learning_rate": 4.64e-07, |
|
"loss": 3.7221, |
|
"step": 10184 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_accuracy": 0.40933528836754646, |
|
"eval_loss": 4.081498622894287, |
|
"eval_runtime": 0.6152, |
|
"eval_samples_per_second": 6.502, |
|
"eval_steps_per_second": 1.625, |
|
"step": 10184 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"learning_rate": 4.62e-07, |
|
"loss": 3.7211, |
|
"step": 10222 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 4.0801472663879395, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 10222 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"learning_rate": 4.6e-07, |
|
"loss": 3.7232, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"eval_accuracy": 0.40933528836754646, |
|
"eval_loss": 4.0787458419799805, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 6.503, |
|
"eval_steps_per_second": 1.626, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"learning_rate": 4.58e-07, |
|
"loss": 3.718, |
|
"step": 10298 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"eval_accuracy": 0.4100684261974585, |
|
"eval_loss": 4.07801628112793, |
|
"eval_runtime": 0.6249, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 1.6, |
|
"step": 10298 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"learning_rate": 4.56e-07, |
|
"loss": 3.7208, |
|
"step": 10336 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_accuracy": 0.4108015640273705, |
|
"eval_loss": 4.077081203460693, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 10336 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"learning_rate": 4.54e-07, |
|
"loss": 3.7109, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"eval_accuracy": 0.4115347018572825, |
|
"eval_loss": 4.07664155960083, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"learning_rate": 4.5199999999999997e-07, |
|
"loss": 3.7146, |
|
"step": 10412 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"eval_accuracy": 0.41104594330400784, |
|
"eval_loss": 4.073920249938965, |
|
"eval_runtime": 0.626, |
|
"eval_samples_per_second": 6.39, |
|
"eval_steps_per_second": 1.597, |
|
"step": 10412 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 4.5e-07, |
|
"loss": 3.7071, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"eval_accuracy": 0.41177908113391987, |
|
"eval_loss": 4.073719501495361, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"learning_rate": 4.48e-07, |
|
"loss": 3.7044, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_accuracy": 0.41226783968719455, |
|
"eval_loss": 4.074197769165039, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"learning_rate": 4.46e-07, |
|
"loss": 3.7094, |
|
"step": 10526 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"eval_accuracy": 0.4125122189638319, |
|
"eval_loss": 4.071889400482178, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 10526 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"learning_rate": 4.44e-07, |
|
"loss": 3.7028, |
|
"step": 10564 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"eval_accuracy": 0.4120234604105572, |
|
"eval_loss": 4.071835994720459, |
|
"eval_runtime": 0.6231, |
|
"eval_samples_per_second": 6.419, |
|
"eval_steps_per_second": 1.605, |
|
"step": 10564 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"learning_rate": 4.4199999999999996e-07, |
|
"loss": 3.7051, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"eval_accuracy": 0.4120234604105572, |
|
"eval_loss": 4.069863319396973, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.63, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"learning_rate": 4.3999999999999997e-07, |
|
"loss": 3.7011, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_accuracy": 0.4125122189638319, |
|
"eval_loss": 4.068091869354248, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"learning_rate": 4.38e-07, |
|
"loss": 3.6954, |
|
"step": 10678 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"eval_accuracy": 0.4120234604105572, |
|
"eval_loss": 4.066802501678467, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 10678 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"learning_rate": 4.36e-07, |
|
"loss": 3.6933, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"eval_accuracy": 0.41226783968719455, |
|
"eval_loss": 4.066892623901367, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"learning_rate": 4.34e-07, |
|
"loss": 3.6935, |
|
"step": 10754 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"eval_accuracy": 0.4125122189638319, |
|
"eval_loss": 4.063753128051758, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 10754 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"learning_rate": 4.3199999999999995e-07, |
|
"loss": 3.6867, |
|
"step": 10792 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_accuracy": 0.4125122189638319, |
|
"eval_loss": 4.065001964569092, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 10792 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"learning_rate": 4.2999999999999996e-07, |
|
"loss": 3.6888, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"eval_accuracy": 0.4120234604105572, |
|
"eval_loss": 4.0640668869018555, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"learning_rate": 4.2799999999999997e-07, |
|
"loss": 3.6843, |
|
"step": 10868 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"eval_accuracy": 0.4115347018572825, |
|
"eval_loss": 4.0637993812561035, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 10868 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"learning_rate": 4.26e-07, |
|
"loss": 3.6824, |
|
"step": 10906 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"eval_accuracy": 0.4125122189638319, |
|
"eval_loss": 4.06214714050293, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 10906 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"learning_rate": 4.24e-07, |
|
"loss": 3.6821, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_accuracy": 0.41226783968719455, |
|
"eval_loss": 4.060315132141113, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"learning_rate": 4.2199999999999994e-07, |
|
"loss": 3.6802, |
|
"step": 10982 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"eval_accuracy": 0.4125122189638319, |
|
"eval_loss": 4.062171459197998, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 10982 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"learning_rate": 4.1999999999999995e-07, |
|
"loss": 3.6789, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"eval_accuracy": 0.41275659824046923, |
|
"eval_loss": 4.057875633239746, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"learning_rate": 4.1799999999999996e-07, |
|
"loss": 3.6767, |
|
"step": 11058 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"eval_accuracy": 0.41300097751710657, |
|
"eval_loss": 4.057925701141357, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 1.632, |
|
"step": 11058 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"learning_rate": 4.1599999999999997e-07, |
|
"loss": 3.6751, |
|
"step": 11096 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_accuracy": 0.4137341153470186, |
|
"eval_loss": 4.058208465576172, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 6.478, |
|
"eval_steps_per_second": 1.62, |
|
"step": 11096 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"learning_rate": 4.14e-07, |
|
"loss": 3.6726, |
|
"step": 11134 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"eval_accuracy": 0.4137341153470186, |
|
"eval_loss": 4.055559158325195, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11134 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"learning_rate": 4.12e-07, |
|
"loss": 3.6704, |
|
"step": 11172 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"eval_accuracy": 0.4137341153470186, |
|
"eval_loss": 4.058291435241699, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 11172 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"learning_rate": 4.0999999999999994e-07, |
|
"loss": 3.6703, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"eval_accuracy": 0.4142228739002933, |
|
"eval_loss": 4.055552005767822, |
|
"eval_runtime": 0.626, |
|
"eval_samples_per_second": 6.39, |
|
"eval_steps_per_second": 1.598, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"learning_rate": 4.0799999999999995e-07, |
|
"loss": 3.6662, |
|
"step": 11248 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.05183219909668, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 11248 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"learning_rate": 4.06e-07, |
|
"loss": 3.6643, |
|
"step": 11286 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.05209493637085, |
|
"eval_runtime": 0.626, |
|
"eval_samples_per_second": 6.39, |
|
"eval_steps_per_second": 1.597, |
|
"step": 11286 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"learning_rate": 4.04e-07, |
|
"loss": 3.6623, |
|
"step": 11324 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"eval_accuracy": 0.4144672531769306, |
|
"eval_loss": 4.054409980773926, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11324 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"learning_rate": 4.02e-07, |
|
"loss": 3.6626, |
|
"step": 11362 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.051777362823486, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 11362 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 4e-07, |
|
"loss": 3.661, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.049643516540527, |
|
"eval_runtime": 0.7928, |
|
"eval_samples_per_second": 5.046, |
|
"eval_steps_per_second": 1.261, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"learning_rate": 3.98e-07, |
|
"loss": 3.6553, |
|
"step": 11438 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"eval_accuracy": 0.4149560117302053, |
|
"eval_loss": 4.048153400421143, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 1.633, |
|
"step": 11438 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"learning_rate": 3.96e-07, |
|
"loss": 3.6573, |
|
"step": 11476 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.047247886657715, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11476 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"learning_rate": 3.94e-07, |
|
"loss": 3.6548, |
|
"step": 11514 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"eval_accuracy": 0.41520039100684264, |
|
"eval_loss": 4.046008586883545, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 11514 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"learning_rate": 3.92e-07, |
|
"loss": 3.6531, |
|
"step": 11552 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.046994209289551, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11552 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"learning_rate": 3.8999999999999997e-07, |
|
"loss": 3.6549, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"eval_accuracy": 0.4149560117302053, |
|
"eval_loss": 4.046128273010254, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"learning_rate": 3.88e-07, |
|
"loss": 3.6485, |
|
"step": 11628 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"eval_accuracy": 0.41471163245356796, |
|
"eval_loss": 4.0460734367370605, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11628 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"learning_rate": 3.86e-07, |
|
"loss": 3.6441, |
|
"step": 11666 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"eval_accuracy": 0.4149560117302053, |
|
"eval_loss": 4.046470642089844, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 11666 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"learning_rate": 3.84e-07, |
|
"loss": 3.6438, |
|
"step": 11704 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_accuracy": 0.41593352883675466, |
|
"eval_loss": 4.042454719543457, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 11704 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"learning_rate": 3.82e-07, |
|
"loss": 3.6435, |
|
"step": 11742 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"eval_accuracy": 0.4156891495601173, |
|
"eval_loss": 4.040951251983643, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 11742 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"learning_rate": 3.7999999999999996e-07, |
|
"loss": 3.6397, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"eval_accuracy": 0.41593352883675466, |
|
"eval_loss": 4.040650844573975, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"learning_rate": 3.7799999999999997e-07, |
|
"loss": 3.6363, |
|
"step": 11818 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"eval_accuracy": 0.41544477028348, |
|
"eval_loss": 4.042422294616699, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 11818 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"learning_rate": 3.76e-07, |
|
"loss": 3.6315, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_accuracy": 0.41544477028348, |
|
"eval_loss": 4.043632984161377, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"learning_rate": 3.74e-07, |
|
"loss": 3.6323, |
|
"step": 11894 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"eval_accuracy": 0.4156891495601173, |
|
"eval_loss": 4.040919303894043, |
|
"eval_runtime": 0.628, |
|
"eval_samples_per_second": 6.369, |
|
"eval_steps_per_second": 1.592, |
|
"step": 11894 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"learning_rate": 3.72e-07, |
|
"loss": 3.6386, |
|
"step": 11932 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"eval_accuracy": 0.4156891495601173, |
|
"eval_loss": 4.038565158843994, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 11932 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"learning_rate": 3.7e-07, |
|
"loss": 3.6303, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"eval_accuracy": 0.41544477028348, |
|
"eval_loss": 4.0388689041137695, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"learning_rate": 3.6799999999999996e-07, |
|
"loss": 3.6336, |
|
"step": 12008 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_accuracy": 0.41642228739002934, |
|
"eval_loss": 4.039405345916748, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12008 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"learning_rate": 3.6599999999999997e-07, |
|
"loss": 3.6281, |
|
"step": 12046 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"eval_accuracy": 0.4166666666666667, |
|
"eval_loss": 4.038857460021973, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 12046 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"learning_rate": 3.64e-07, |
|
"loss": 3.6249, |
|
"step": 12084 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"eval_accuracy": 0.41764418377321605, |
|
"eval_loss": 4.037881374359131, |
|
"eval_runtime": 0.7782, |
|
"eval_samples_per_second": 5.14, |
|
"eval_steps_per_second": 1.285, |
|
"step": 12084 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"learning_rate": 3.62e-07, |
|
"loss": 3.6277, |
|
"step": 12122 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"eval_accuracy": 0.41764418377321605, |
|
"eval_loss": 4.037135601043701, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 12122 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"learning_rate": 3.6e-07, |
|
"loss": 3.6232, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_accuracy": 0.41715542521994137, |
|
"eval_loss": 4.035280704498291, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"learning_rate": 3.5799999999999995e-07, |
|
"loss": 3.6177, |
|
"step": 12198 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"eval_accuracy": 0.41764418377321605, |
|
"eval_loss": 4.036287307739258, |
|
"eval_runtime": 0.6153, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 1.625, |
|
"step": 12198 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"learning_rate": 3.5599999999999996e-07, |
|
"loss": 3.626, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"eval_accuracy": 0.4173998044965787, |
|
"eval_loss": 4.031866073608398, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"learning_rate": 3.5399999999999997e-07, |
|
"loss": 3.6181, |
|
"step": 12274 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"eval_accuracy": 0.41715542521994137, |
|
"eval_loss": 4.031935691833496, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 12274 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"learning_rate": 3.52e-07, |
|
"loss": 3.6183, |
|
"step": 12312 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_accuracy": 0.41764418377321605, |
|
"eval_loss": 4.03291130065918, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 12312 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 3.5e-07, |
|
"loss": 3.6169, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"eval_accuracy": 0.41764418377321605, |
|
"eval_loss": 4.032841682434082, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"learning_rate": 3.4799999999999994e-07, |
|
"loss": 3.6094, |
|
"step": 12388 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"eval_accuracy": 0.4178885630498534, |
|
"eval_loss": 4.031832218170166, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12388 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"learning_rate": 3.4599999999999995e-07, |
|
"loss": 3.6138, |
|
"step": 12426 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"eval_accuracy": 0.4178885630498534, |
|
"eval_loss": 4.029395580291748, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 12426 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"learning_rate": 3.4399999999999996e-07, |
|
"loss": 3.6101, |
|
"step": 12464 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_accuracy": 0.41813294232649073, |
|
"eval_loss": 4.031092166900635, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 12464 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"learning_rate": 3.42e-07, |
|
"loss": 3.6062, |
|
"step": 12502 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"eval_accuracy": 0.41837732160312807, |
|
"eval_loss": 4.029919624328613, |
|
"eval_runtime": 0.6173, |
|
"eval_samples_per_second": 6.48, |
|
"eval_steps_per_second": 1.62, |
|
"step": 12502 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"loss": 3.6093, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"eval_accuracy": 0.41813294232649073, |
|
"eval_loss": 4.027568817138672, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"learning_rate": 3.38e-07, |
|
"loss": 3.6071, |
|
"step": 12578 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"eval_accuracy": 0.41813294232649073, |
|
"eval_loss": 4.030076503753662, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 12578 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"learning_rate": 3.36e-07, |
|
"loss": 3.6064, |
|
"step": 12616 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_accuracy": 0.41837732160312807, |
|
"eval_loss": 4.027680397033691, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 1.631, |
|
"step": 12616 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"learning_rate": 3.34e-07, |
|
"loss": 3.5982, |
|
"step": 12654 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"eval_accuracy": 0.41837732160312807, |
|
"eval_loss": 4.028773784637451, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 12654 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"learning_rate": 3.32e-07, |
|
"loss": 3.6064, |
|
"step": 12692 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"eval_accuracy": 0.4178885630498534, |
|
"eval_loss": 4.0255818367004395, |
|
"eval_runtime": 0.6242, |
|
"eval_samples_per_second": 6.408, |
|
"eval_steps_per_second": 1.602, |
|
"step": 12692 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"learning_rate": 3.3e-07, |
|
"loss": 3.6023, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"eval_accuracy": 0.41837732160312807, |
|
"eval_loss": 4.025238037109375, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"learning_rate": 3.28e-07, |
|
"loss": 3.5992, |
|
"step": 12768 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_accuracy": 0.4186217008797654, |
|
"eval_loss": 4.024014472961426, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 1.632, |
|
"step": 12768 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"learning_rate": 3.26e-07, |
|
"loss": 3.5997, |
|
"step": 12806 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"eval_accuracy": 0.41886608015640275, |
|
"eval_loss": 4.0236945152282715, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12806 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"learning_rate": 3.24e-07, |
|
"loss": 3.5955, |
|
"step": 12844 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"eval_accuracy": 0.4186217008797654, |
|
"eval_loss": 4.02353048324585, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12844 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"learning_rate": 3.22e-07, |
|
"loss": 3.5929, |
|
"step": 12882 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"eval_accuracy": 0.4186217008797654, |
|
"eval_loss": 4.023321151733398, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 12882 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"learning_rate": 3.2e-07, |
|
"loss": 3.5953, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_accuracy": 0.41886608015640275, |
|
"eval_loss": 4.020965099334717, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 1.632, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"learning_rate": 3.18e-07, |
|
"loss": 3.5915, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"eval_accuracy": 0.41837732160312807, |
|
"eval_loss": 4.020979404449463, |
|
"eval_runtime": 0.6256, |
|
"eval_samples_per_second": 6.394, |
|
"eval_steps_per_second": 1.598, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"learning_rate": 3.1599999999999997e-07, |
|
"loss": 3.5835, |
|
"step": 12996 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"eval_accuracy": 0.41886608015640275, |
|
"eval_loss": 4.022586345672607, |
|
"eval_runtime": 0.6251, |
|
"eval_samples_per_second": 6.399, |
|
"eval_steps_per_second": 1.6, |
|
"step": 12996 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"learning_rate": 3.14e-07, |
|
"loss": 3.5852, |
|
"step": 13034 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"eval_accuracy": 0.41886608015640275, |
|
"eval_loss": 4.022684574127197, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13034 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"learning_rate": 3.12e-07, |
|
"loss": 3.5894, |
|
"step": 13072 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_accuracy": 0.4191104594330401, |
|
"eval_loss": 4.022200584411621, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 13072 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"learning_rate": 3.1e-07, |
|
"loss": 3.5864, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.022695541381836, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"learning_rate": 3.08e-07, |
|
"loss": 3.5854, |
|
"step": 13148 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.018957138061523, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13148 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"learning_rate": 3.0599999999999996e-07, |
|
"loss": 3.5841, |
|
"step": 13186 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"eval_accuracy": 0.4191104594330401, |
|
"eval_loss": 4.017984390258789, |
|
"eval_runtime": 0.6155, |
|
"eval_samples_per_second": 6.499, |
|
"eval_steps_per_second": 1.625, |
|
"step": 13186 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"learning_rate": 3.0399999999999997e-07, |
|
"loss": 3.5821, |
|
"step": 13224 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.018927097320557, |
|
"eval_runtime": 0.6152, |
|
"eval_samples_per_second": 6.502, |
|
"eval_steps_per_second": 1.626, |
|
"step": 13224 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"learning_rate": 3.02e-07, |
|
"loss": 3.5823, |
|
"step": 13262 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"eval_accuracy": 0.4191104594330401, |
|
"eval_loss": 4.0175862312316895, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13262 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 3e-07, |
|
"loss": 3.5772, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_accuracy": 0.4191104594330401, |
|
"eval_loss": 4.016434669494629, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 351.0, |
|
"learning_rate": 2.98e-07, |
|
"loss": 3.5827, |
|
"step": 13338 |
|
}, |
|
{ |
|
"epoch": 351.0, |
|
"eval_accuracy": 0.4186217008797654, |
|
"eval_loss": 4.014683723449707, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 13338 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"learning_rate": 2.9599999999999995e-07, |
|
"loss": 3.5747, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.014786720275879, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 353.0, |
|
"learning_rate": 2.9399999999999996e-07, |
|
"loss": 3.5745, |
|
"step": 13414 |
|
}, |
|
{ |
|
"epoch": 353.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.016923904418945, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13414 |
|
}, |
|
{ |
|
"epoch": 354.0, |
|
"learning_rate": 2.9199999999999997e-07, |
|
"loss": 3.576, |
|
"step": 13452 |
|
}, |
|
{ |
|
"epoch": 354.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.0161919593811035, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13452 |
|
}, |
|
{ |
|
"epoch": 355.0, |
|
"learning_rate": 2.9e-07, |
|
"loss": 3.5723, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 355.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.012264728546143, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"learning_rate": 2.88e-07, |
|
"loss": 3.5669, |
|
"step": 13528 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.014427185058594, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13528 |
|
}, |
|
{ |
|
"epoch": 357.0, |
|
"learning_rate": 2.8599999999999994e-07, |
|
"loss": 3.5721, |
|
"step": 13566 |
|
}, |
|
{ |
|
"epoch": 357.0, |
|
"eval_accuracy": 0.41886608015640275, |
|
"eval_loss": 4.0136189460754395, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 13566 |
|
}, |
|
{ |
|
"epoch": 358.0, |
|
"learning_rate": 2.8399999999999995e-07, |
|
"loss": 3.5725, |
|
"step": 13604 |
|
}, |
|
{ |
|
"epoch": 358.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.01244592666626, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 13604 |
|
}, |
|
{ |
|
"epoch": 359.0, |
|
"learning_rate": 2.8199999999999996e-07, |
|
"loss": 3.5627, |
|
"step": 13642 |
|
}, |
|
{ |
|
"epoch": 359.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.012938976287842, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 13642 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"learning_rate": 2.8e-07, |
|
"loss": 3.5632, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.012718677520752, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.632, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 361.0, |
|
"learning_rate": 2.7800000000000003e-07, |
|
"loss": 3.5641, |
|
"step": 13718 |
|
}, |
|
{ |
|
"epoch": 361.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.01040506362915, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 13718 |
|
}, |
|
{ |
|
"epoch": 362.0, |
|
"learning_rate": 2.7600000000000004e-07, |
|
"loss": 3.5636, |
|
"step": 13756 |
|
}, |
|
{ |
|
"epoch": 362.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.010016918182373, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 13756 |
|
}, |
|
{ |
|
"epoch": 363.0, |
|
"learning_rate": 2.74e-07, |
|
"loss": 3.5566, |
|
"step": 13794 |
|
}, |
|
{ |
|
"epoch": 363.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.01265811920166, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13794 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"learning_rate": 2.72e-07, |
|
"loss": 3.5556, |
|
"step": 13832 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_accuracy": 0.4198435972629521, |
|
"eval_loss": 4.013090133666992, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.627, |
|
"step": 13832 |
|
}, |
|
{ |
|
"epoch": 365.0, |
|
"learning_rate": 2.7e-07, |
|
"loss": 3.5606, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 365.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.01081657409668, |
|
"eval_runtime": 0.6265, |
|
"eval_samples_per_second": 6.385, |
|
"eval_steps_per_second": 1.596, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"learning_rate": 2.68e-07, |
|
"loss": 3.5573, |
|
"step": 13908 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.009543418884277, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 13908 |
|
}, |
|
{ |
|
"epoch": 367.0, |
|
"learning_rate": 2.66e-07, |
|
"loss": 3.5603, |
|
"step": 13946 |
|
}, |
|
{ |
|
"epoch": 367.0, |
|
"eval_accuracy": 0.4191104594330401, |
|
"eval_loss": 4.007948875427246, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 13946 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"learning_rate": 2.64e-07, |
|
"loss": 3.5552, |
|
"step": 13984 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_accuracy": 0.4191104594330401, |
|
"eval_loss": 4.007278919219971, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 13984 |
|
}, |
|
{ |
|
"epoch": 369.0, |
|
"learning_rate": 2.62e-07, |
|
"loss": 3.5594, |
|
"step": 14022 |
|
}, |
|
{ |
|
"epoch": 369.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.007977485656738, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 14022 |
|
}, |
|
{ |
|
"epoch": 370.0, |
|
"learning_rate": 2.6e-07, |
|
"loss": 3.5557, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 370.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.006712913513184, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 371.0, |
|
"learning_rate": 2.58e-07, |
|
"loss": 3.5523, |
|
"step": 14098 |
|
}, |
|
{ |
|
"epoch": 371.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.006473541259766, |
|
"eval_runtime": 0.615, |
|
"eval_samples_per_second": 6.504, |
|
"eval_steps_per_second": 1.626, |
|
"step": 14098 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"learning_rate": 2.56e-07, |
|
"loss": 3.5516, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.007019519805908, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 373.0, |
|
"learning_rate": 2.5399999999999997e-07, |
|
"loss": 3.5466, |
|
"step": 14174 |
|
}, |
|
{ |
|
"epoch": 373.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.007321834564209, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 14174 |
|
}, |
|
{ |
|
"epoch": 374.0, |
|
"learning_rate": 2.52e-07, |
|
"loss": 3.5474, |
|
"step": 14212 |
|
}, |
|
{ |
|
"epoch": 374.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.004045486450195, |
|
"eval_runtime": 0.6268, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 1.595, |
|
"step": 14212 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"learning_rate": 2.5e-07, |
|
"loss": 3.5481, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"eval_accuracy": 0.4195992179863148, |
|
"eval_loss": 4.003184795379639, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"learning_rate": 2.48e-07, |
|
"loss": 3.5496, |
|
"step": 14288 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.00510311126709, |
|
"eval_runtime": 0.6153, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 1.625, |
|
"step": 14288 |
|
}, |
|
{ |
|
"epoch": 377.0, |
|
"learning_rate": 2.46e-07, |
|
"loss": 3.5489, |
|
"step": 14326 |
|
}, |
|
{ |
|
"epoch": 377.0, |
|
"eval_accuracy": 0.41935483870967744, |
|
"eval_loss": 4.003530502319336, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 14326 |
|
}, |
|
{ |
|
"epoch": 378.0, |
|
"learning_rate": 2.4399999999999996e-07, |
|
"loss": 3.5439, |
|
"step": 14364 |
|
}, |
|
{ |
|
"epoch": 378.0, |
|
"eval_accuracy": 0.4198435972629521, |
|
"eval_loss": 4.0032219886779785, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 14364 |
|
}, |
|
{ |
|
"epoch": 379.0, |
|
"learning_rate": 2.4199999999999997e-07, |
|
"loss": 3.5464, |
|
"step": 14402 |
|
}, |
|
{ |
|
"epoch": 379.0, |
|
"eval_accuracy": 0.42057673509286414, |
|
"eval_loss": 4.002893924713135, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 14402 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"learning_rate": 2.4e-07, |
|
"loss": 3.5455, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_accuracy": 0.4198435972629521, |
|
"eval_loss": 4.003747463226318, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 381.0, |
|
"learning_rate": 2.38e-07, |
|
"loss": 3.5439, |
|
"step": 14478 |
|
}, |
|
{ |
|
"epoch": 381.0, |
|
"eval_accuracy": 0.42057673509286414, |
|
"eval_loss": 4.002392292022705, |
|
"eval_runtime": 0.6152, |
|
"eval_samples_per_second": 6.502, |
|
"eval_steps_per_second": 1.626, |
|
"step": 14478 |
|
}, |
|
{ |
|
"epoch": 382.0, |
|
"learning_rate": 2.3599999999999997e-07, |
|
"loss": 3.542, |
|
"step": 14516 |
|
}, |
|
{ |
|
"epoch": 382.0, |
|
"eval_accuracy": 0.4203323558162268, |
|
"eval_loss": 4.001096725463867, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 1.632, |
|
"step": 14516 |
|
}, |
|
{ |
|
"epoch": 383.0, |
|
"learning_rate": 2.34e-07, |
|
"loss": 3.5366, |
|
"step": 14554 |
|
}, |
|
{ |
|
"epoch": 383.0, |
|
"eval_accuracy": 0.4203323558162268, |
|
"eval_loss": 4.001129150390625, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 14554 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"learning_rate": 2.32e-07, |
|
"loss": 3.5368, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_accuracy": 0.42057673509286414, |
|
"eval_loss": 4.001524448394775, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 385.0, |
|
"learning_rate": 2.3e-07, |
|
"loss": 3.5382, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 385.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 4.0017523765563965, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 6.503, |
|
"eval_steps_per_second": 1.626, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 386.0, |
|
"learning_rate": 2.28e-07, |
|
"loss": 3.5358, |
|
"step": 14668 |
|
}, |
|
{ |
|
"epoch": 386.0, |
|
"eval_accuracy": 0.42008797653958946, |
|
"eval_loss": 4.000154495239258, |
|
"eval_runtime": 0.6297, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 1.588, |
|
"step": 14668 |
|
}, |
|
{ |
|
"epoch": 387.0, |
|
"learning_rate": 2.2599999999999999e-07, |
|
"loss": 3.5324, |
|
"step": 14706 |
|
}, |
|
{ |
|
"epoch": 387.0, |
|
"eval_accuracy": 0.4198435972629521, |
|
"eval_loss": 3.9989571571350098, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 14706 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"learning_rate": 2.24e-07, |
|
"loss": 3.5378, |
|
"step": 14744 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_accuracy": 0.42057673509286414, |
|
"eval_loss": 4.000235080718994, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 14744 |
|
}, |
|
{ |
|
"epoch": 389.0, |
|
"learning_rate": 2.22e-07, |
|
"loss": 3.5334, |
|
"step": 14782 |
|
}, |
|
{ |
|
"epoch": 389.0, |
|
"eval_accuracy": 0.4208211143695015, |
|
"eval_loss": 3.9985251426696777, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.627, |
|
"step": 14782 |
|
}, |
|
{ |
|
"epoch": 390.0, |
|
"learning_rate": 2.1999999999999998e-07, |
|
"loss": 3.5349, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 390.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.998689651489258, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 391.0, |
|
"learning_rate": 2.18e-07, |
|
"loss": 3.5378, |
|
"step": 14858 |
|
}, |
|
{ |
|
"epoch": 391.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.9983861446380615, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 14858 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"learning_rate": 2.1599999999999998e-07, |
|
"loss": 3.5304, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_accuracy": 0.42057673509286414, |
|
"eval_loss": 3.9976606369018555, |
|
"eval_runtime": 0.6239, |
|
"eval_samples_per_second": 6.411, |
|
"eval_steps_per_second": 1.603, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 393.0, |
|
"learning_rate": 2.1399999999999998e-07, |
|
"loss": 3.5241, |
|
"step": 14934 |
|
}, |
|
{ |
|
"epoch": 393.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9984891414642334, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 14934 |
|
}, |
|
{ |
|
"epoch": 394.0, |
|
"learning_rate": 2.12e-07, |
|
"loss": 3.527, |
|
"step": 14972 |
|
}, |
|
{ |
|
"epoch": 394.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.9997339248657227, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 14972 |
|
}, |
|
{ |
|
"epoch": 395.0, |
|
"learning_rate": 2.0999999999999997e-07, |
|
"loss": 3.5261, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 395.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.9985299110412598, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"learning_rate": 2.0799999999999998e-07, |
|
"loss": 3.5233, |
|
"step": 15048 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_accuracy": 0.4215542521994135, |
|
"eval_loss": 3.9982762336730957, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.627, |
|
"step": 15048 |
|
}, |
|
{ |
|
"epoch": 397.0, |
|
"learning_rate": 2.06e-07, |
|
"loss": 3.5279, |
|
"step": 15086 |
|
}, |
|
{ |
|
"epoch": 397.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9965884685516357, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15086 |
|
}, |
|
{ |
|
"epoch": 398.0, |
|
"learning_rate": 2.0399999999999997e-07, |
|
"loss": 3.5276, |
|
"step": 15124 |
|
}, |
|
{ |
|
"epoch": 398.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.995763063430786, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15124 |
|
}, |
|
{ |
|
"epoch": 399.0, |
|
"learning_rate": 2.02e-07, |
|
"loss": 3.5214, |
|
"step": 15162 |
|
}, |
|
{ |
|
"epoch": 399.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9957404136657715, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 15162 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 2e-07, |
|
"loss": 3.5222, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.995762586593628, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 401.0, |
|
"learning_rate": 1.98e-07, |
|
"loss": 3.5163, |
|
"step": 15238 |
|
}, |
|
{ |
|
"epoch": 401.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9957165718078613, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 15238 |
|
}, |
|
{ |
|
"epoch": 402.0, |
|
"learning_rate": 1.96e-07, |
|
"loss": 3.5208, |
|
"step": 15276 |
|
}, |
|
{ |
|
"epoch": 402.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.995258092880249, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15276 |
|
}, |
|
{ |
|
"epoch": 403.0, |
|
"learning_rate": 1.94e-07, |
|
"loss": 3.5168, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 403.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.994943380355835, |
|
"eval_runtime": 0.616, |
|
"eval_samples_per_second": 6.494, |
|
"eval_steps_per_second": 1.623, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"learning_rate": 1.92e-07, |
|
"loss": 3.5242, |
|
"step": 15352 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_accuracy": 0.4215542521994135, |
|
"eval_loss": 3.994105577468872, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 15352 |
|
}, |
|
{ |
|
"epoch": 405.0, |
|
"learning_rate": 1.8999999999999998e-07, |
|
"loss": 3.5205, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 405.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.993699789047241, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 406.0, |
|
"learning_rate": 1.88e-07, |
|
"loss": 3.5158, |
|
"step": 15428 |
|
}, |
|
{ |
|
"epoch": 406.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9949395656585693, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 15428 |
|
}, |
|
{ |
|
"epoch": 407.0, |
|
"learning_rate": 1.86e-07, |
|
"loss": 3.517, |
|
"step": 15466 |
|
}, |
|
{ |
|
"epoch": 407.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9939072132110596, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 15466 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"learning_rate": 1.8399999999999998e-07, |
|
"loss": 3.519, |
|
"step": 15504 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_accuracy": 0.4215542521994135, |
|
"eval_loss": 3.9944329261779785, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15504 |
|
}, |
|
{ |
|
"epoch": 409.0, |
|
"learning_rate": 1.82e-07, |
|
"loss": 3.5164, |
|
"step": 15542 |
|
}, |
|
{ |
|
"epoch": 409.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9929213523864746, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 6.478, |
|
"eval_steps_per_second": 1.619, |
|
"step": 15542 |
|
}, |
|
{ |
|
"epoch": 410.0, |
|
"learning_rate": 1.8e-07, |
|
"loss": 3.5133, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 410.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.9925248622894287, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 411.0, |
|
"learning_rate": 1.7799999999999998e-07, |
|
"loss": 3.5199, |
|
"step": 15618 |
|
}, |
|
{ |
|
"epoch": 411.0, |
|
"eval_accuracy": 0.4210654936461388, |
|
"eval_loss": 3.9905953407287598, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 15618 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"learning_rate": 1.76e-07, |
|
"loss": 3.5117, |
|
"step": 15656 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_accuracy": 0.4215542521994135, |
|
"eval_loss": 3.9919614791870117, |
|
"eval_runtime": 0.6189, |
|
"eval_samples_per_second": 6.463, |
|
"eval_steps_per_second": 1.616, |
|
"step": 15656 |
|
}, |
|
{ |
|
"epoch": 413.0, |
|
"learning_rate": 1.7399999999999997e-07, |
|
"loss": 3.5151, |
|
"step": 15694 |
|
}, |
|
{ |
|
"epoch": 413.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9906229972839355, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 15694 |
|
}, |
|
{ |
|
"epoch": 414.0, |
|
"learning_rate": 1.7199999999999998e-07, |
|
"loss": 3.5093, |
|
"step": 15732 |
|
}, |
|
{ |
|
"epoch": 414.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9914052486419678, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15732 |
|
}, |
|
{ |
|
"epoch": 415.0, |
|
"learning_rate": 1.7000000000000001e-07, |
|
"loss": 3.512, |
|
"step": 15770 |
|
}, |
|
{ |
|
"epoch": 415.0, |
|
"eval_accuracy": 0.4215542521994135, |
|
"eval_loss": 3.9908926486968994, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 15770 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"learning_rate": 1.68e-07, |
|
"loss": 3.5076, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9911580085754395, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 417.0, |
|
"learning_rate": 1.66e-07, |
|
"loss": 3.5059, |
|
"step": 15846 |
|
}, |
|
{ |
|
"epoch": 417.0, |
|
"eval_accuracy": 0.4220430107526882, |
|
"eval_loss": 3.9916296005249023, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 15846 |
|
}, |
|
{ |
|
"epoch": 418.0, |
|
"learning_rate": 1.64e-07, |
|
"loss": 3.5096, |
|
"step": 15884 |
|
}, |
|
{ |
|
"epoch": 418.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.990671396255493, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15884 |
|
}, |
|
{ |
|
"epoch": 419.0, |
|
"learning_rate": 1.62e-07, |
|
"loss": 3.5038, |
|
"step": 15922 |
|
}, |
|
{ |
|
"epoch": 419.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9902234077453613, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 15922 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"learning_rate": 1.6e-07, |
|
"loss": 3.5089, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_accuracy": 0.4215542521994135, |
|
"eval_loss": 3.989504814147949, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 421.0, |
|
"learning_rate": 1.5799999999999999e-07, |
|
"loss": 3.5091, |
|
"step": 15998 |
|
}, |
|
{ |
|
"epoch": 421.0, |
|
"eval_accuracy": 0.42130987292277616, |
|
"eval_loss": 3.9893267154693604, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 15998 |
|
}, |
|
{ |
|
"epoch": 422.0, |
|
"learning_rate": 1.56e-07, |
|
"loss": 3.5101, |
|
"step": 16036 |
|
}, |
|
{ |
|
"epoch": 422.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9890270233154297, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 16036 |
|
}, |
|
{ |
|
"epoch": 423.0, |
|
"learning_rate": 1.54e-07, |
|
"loss": 3.5061, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 423.0, |
|
"eval_accuracy": 0.4220430107526882, |
|
"eval_loss": 3.990032434463501, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"learning_rate": 1.5199999999999998e-07, |
|
"loss": 3.5048, |
|
"step": 16112 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9888319969177246, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 16112 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"learning_rate": 1.5e-07, |
|
"loss": 3.501, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.9880638122558594, |
|
"eval_runtime": 0.6256, |
|
"eval_samples_per_second": 6.394, |
|
"eval_steps_per_second": 1.599, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 426.0, |
|
"learning_rate": 1.4799999999999998e-07, |
|
"loss": 3.5067, |
|
"step": 16188 |
|
}, |
|
{ |
|
"epoch": 426.0, |
|
"eval_accuracy": 0.42179863147605084, |
|
"eval_loss": 3.987746000289917, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 16188 |
|
}, |
|
{ |
|
"epoch": 427.0, |
|
"learning_rate": 1.4599999999999998e-07, |
|
"loss": 3.5037, |
|
"step": 16226 |
|
}, |
|
{ |
|
"epoch": 427.0, |
|
"eval_accuracy": 0.4222873900293255, |
|
"eval_loss": 3.986624002456665, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 16226 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"learning_rate": 1.44e-07, |
|
"loss": 3.5052, |
|
"step": 16264 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_accuracy": 0.4222873900293255, |
|
"eval_loss": 3.985456943511963, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 1.631, |
|
"step": 16264 |
|
}, |
|
{ |
|
"epoch": 429.0, |
|
"learning_rate": 1.4199999999999997e-07, |
|
"loss": 3.5049, |
|
"step": 16302 |
|
}, |
|
{ |
|
"epoch": 429.0, |
|
"eval_accuracy": 0.4222873900293255, |
|
"eval_loss": 3.9861788749694824, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16302 |
|
}, |
|
{ |
|
"epoch": 430.0, |
|
"learning_rate": 1.4e-07, |
|
"loss": 3.5017, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 430.0, |
|
"eval_accuracy": 0.4227761485826002, |
|
"eval_loss": 3.987318992614746, |
|
"eval_runtime": 0.6129, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.632, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 431.0, |
|
"learning_rate": 1.3800000000000002e-07, |
|
"loss": 3.5038, |
|
"step": 16378 |
|
}, |
|
{ |
|
"epoch": 431.0, |
|
"eval_accuracy": 0.4227761485826002, |
|
"eval_loss": 3.9872233867645264, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 16378 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"learning_rate": 1.36e-07, |
|
"loss": 3.5072, |
|
"step": 16416 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_accuracy": 0.42253176930596287, |
|
"eval_loss": 3.985309362411499, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16416 |
|
}, |
|
{ |
|
"epoch": 433.0, |
|
"learning_rate": 1.34e-07, |
|
"loss": 3.5009, |
|
"step": 16454 |
|
}, |
|
{ |
|
"epoch": 433.0, |
|
"eval_accuracy": 0.42253176930596287, |
|
"eval_loss": 3.9849016666412354, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.628, |
|
"step": 16454 |
|
}, |
|
{ |
|
"epoch": 434.0, |
|
"learning_rate": 1.32e-07, |
|
"loss": 3.5023, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 434.0, |
|
"eval_accuracy": 0.4227761485826002, |
|
"eval_loss": 3.9856038093566895, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 435.0, |
|
"learning_rate": 1.3e-07, |
|
"loss": 3.4982, |
|
"step": 16530 |
|
}, |
|
{ |
|
"epoch": 435.0, |
|
"eval_accuracy": 0.4227761485826002, |
|
"eval_loss": 3.9859957695007324, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16530 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"learning_rate": 1.28e-07, |
|
"loss": 3.4927, |
|
"step": 16568 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.9858930110931396, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.627, |
|
"step": 16568 |
|
}, |
|
{ |
|
"epoch": 437.0, |
|
"learning_rate": 1.26e-07, |
|
"loss": 3.4959, |
|
"step": 16606 |
|
}, |
|
{ |
|
"epoch": 437.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.986088514328003, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 16606 |
|
}, |
|
{ |
|
"epoch": 438.0, |
|
"learning_rate": 1.24e-07, |
|
"loss": 3.4984, |
|
"step": 16644 |
|
}, |
|
{ |
|
"epoch": 438.0, |
|
"eval_accuracy": 0.4227761485826002, |
|
"eval_loss": 3.9860339164733887, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 16644 |
|
}, |
|
{ |
|
"epoch": 439.0, |
|
"learning_rate": 1.2199999999999998e-07, |
|
"loss": 3.5005, |
|
"step": 16682 |
|
}, |
|
{ |
|
"epoch": 439.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.9846749305725098, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 16682 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"learning_rate": 1.2e-07, |
|
"loss": 3.4947, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.9845149517059326, |
|
"eval_runtime": 0.6202, |
|
"eval_samples_per_second": 6.45, |
|
"eval_steps_per_second": 1.612, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 441.0, |
|
"learning_rate": 1.1799999999999998e-07, |
|
"loss": 3.4964, |
|
"step": 16758 |
|
}, |
|
{ |
|
"epoch": 441.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.9842681884765625, |
|
"eval_runtime": 0.6165, |
|
"eval_samples_per_second": 6.488, |
|
"eval_steps_per_second": 1.622, |
|
"step": 16758 |
|
}, |
|
{ |
|
"epoch": 442.0, |
|
"learning_rate": 1.16e-07, |
|
"loss": 3.4955, |
|
"step": 16796 |
|
}, |
|
{ |
|
"epoch": 442.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.9844443798065186, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16796 |
|
}, |
|
{ |
|
"epoch": 443.0, |
|
"learning_rate": 1.14e-07, |
|
"loss": 3.4923, |
|
"step": 16834 |
|
}, |
|
{ |
|
"epoch": 443.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.9843380451202393, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 16834 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"learning_rate": 1.12e-07, |
|
"loss": 3.4993, |
|
"step": 16872 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.9841716289520264, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16872 |
|
}, |
|
{ |
|
"epoch": 445.0, |
|
"learning_rate": 1.0999999999999999e-07, |
|
"loss": 3.4889, |
|
"step": 16910 |
|
}, |
|
{ |
|
"epoch": 445.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.9846384525299072, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 16910 |
|
}, |
|
{ |
|
"epoch": 446.0, |
|
"learning_rate": 1.0799999999999999e-07, |
|
"loss": 3.487, |
|
"step": 16948 |
|
}, |
|
{ |
|
"epoch": 446.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.98549485206604, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 16948 |
|
}, |
|
{ |
|
"epoch": 447.0, |
|
"learning_rate": 1.06e-07, |
|
"loss": 3.4965, |
|
"step": 16986 |
|
}, |
|
{ |
|
"epoch": 447.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.985051155090332, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 16986 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"learning_rate": 1.0399999999999999e-07, |
|
"loss": 3.4873, |
|
"step": 17024 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.9851512908935547, |
|
"eval_runtime": 0.6232, |
|
"eval_samples_per_second": 6.419, |
|
"eval_steps_per_second": 1.605, |
|
"step": 17024 |
|
}, |
|
{ |
|
"epoch": 449.0, |
|
"learning_rate": 1.0199999999999999e-07, |
|
"loss": 3.4936, |
|
"step": 17062 |
|
}, |
|
{ |
|
"epoch": 449.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.984747886657715, |
|
"eval_runtime": 0.6132, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.631, |
|
"step": 17062 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"learning_rate": 1e-07, |
|
"loss": 3.494, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.984121084213257, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 451.0, |
|
"learning_rate": 9.8e-08, |
|
"loss": 3.4855, |
|
"step": 17138 |
|
}, |
|
{ |
|
"epoch": 451.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.9835801124572754, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 17138 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"learning_rate": 9.6e-08, |
|
"loss": 3.4898, |
|
"step": 17176 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"eval_accuracy": 0.42302052785923755, |
|
"eval_loss": 3.983008623123169, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 17176 |
|
}, |
|
{ |
|
"epoch": 453.0, |
|
"learning_rate": 9.4e-08, |
|
"loss": 3.4866, |
|
"step": 17214 |
|
}, |
|
{ |
|
"epoch": 453.0, |
|
"eval_accuracy": 0.4232649071358749, |
|
"eval_loss": 3.9831044673919678, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 17214 |
|
}, |
|
{ |
|
"epoch": 454.0, |
|
"learning_rate": 9.199999999999999e-08, |
|
"loss": 3.4866, |
|
"step": 17252 |
|
}, |
|
{ |
|
"epoch": 454.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.983053207397461, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 17252 |
|
}, |
|
{ |
|
"epoch": 455.0, |
|
"learning_rate": 9e-08, |
|
"loss": 3.4886, |
|
"step": 17290 |
|
}, |
|
{ |
|
"epoch": 455.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9836299419403076, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 17290 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"learning_rate": 8.8e-08, |
|
"loss": 3.4874, |
|
"step": 17328 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.983760118484497, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 17328 |
|
}, |
|
{ |
|
"epoch": 457.0, |
|
"learning_rate": 8.599999999999999e-08, |
|
"loss": 3.486, |
|
"step": 17366 |
|
}, |
|
{ |
|
"epoch": 457.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.983823776245117, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 17366 |
|
}, |
|
{ |
|
"epoch": 458.0, |
|
"learning_rate": 8.4e-08, |
|
"loss": 3.4869, |
|
"step": 17404 |
|
}, |
|
{ |
|
"epoch": 458.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.983541488647461, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 17404 |
|
}, |
|
{ |
|
"epoch": 459.0, |
|
"learning_rate": 8.2e-08, |
|
"loss": 3.4845, |
|
"step": 17442 |
|
}, |
|
{ |
|
"epoch": 459.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9833405017852783, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 1.627, |
|
"step": 17442 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"learning_rate": 8e-08, |
|
"loss": 3.4849, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9825379848480225, |
|
"eval_runtime": 0.6139, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 461.0, |
|
"learning_rate": 7.8e-08, |
|
"loss": 3.4841, |
|
"step": 17518 |
|
}, |
|
{ |
|
"epoch": 461.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.981783628463745, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 1.626, |
|
"step": 17518 |
|
}, |
|
{ |
|
"epoch": 462.0, |
|
"learning_rate": 7.599999999999999e-08, |
|
"loss": 3.4924, |
|
"step": 17556 |
|
}, |
|
{ |
|
"epoch": 462.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.9813952445983887, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.512, |
|
"eval_steps_per_second": 1.628, |
|
"step": 17556 |
|
}, |
|
{ |
|
"epoch": 463.0, |
|
"learning_rate": 7.399999999999999e-08, |
|
"loss": 3.571, |
|
"step": 17594 |
|
}, |
|
{ |
|
"epoch": 463.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.981501579284668, |
|
"eval_runtime": 0.6142, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 17594 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"learning_rate": 7.2e-08, |
|
"loss": 3.4811, |
|
"step": 17632 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.981280565261841, |
|
"eval_runtime": 0.6153, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 1.625, |
|
"step": 17632 |
|
}, |
|
{ |
|
"epoch": 465.0, |
|
"learning_rate": 7e-08, |
|
"loss": 3.4851, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 465.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.9809834957122803, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 466.0, |
|
"learning_rate": 6.8e-08, |
|
"loss": 3.4776, |
|
"step": 17708 |
|
}, |
|
{ |
|
"epoch": 466.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9812755584716797, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 17708 |
|
}, |
|
{ |
|
"epoch": 467.0, |
|
"learning_rate": 6.6e-08, |
|
"loss": 3.4849, |
|
"step": 17746 |
|
}, |
|
{ |
|
"epoch": 467.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.981030225753784, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 1.629, |
|
"step": 17746 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"learning_rate": 6.4e-08, |
|
"loss": 3.4766, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9813342094421387, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 469.0, |
|
"learning_rate": 6.2e-08, |
|
"loss": 3.4791, |
|
"step": 17822 |
|
}, |
|
{ |
|
"epoch": 469.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.981501817703247, |
|
"eval_runtime": 0.6135, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 1.63, |
|
"step": 17822 |
|
}, |
|
{ |
|
"epoch": 470.0, |
|
"learning_rate": 6e-08, |
|
"loss": 3.4814, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 470.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.98130464553833, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.627, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 471.0, |
|
"learning_rate": 5.8e-08, |
|
"loss": 3.4861, |
|
"step": 17898 |
|
}, |
|
{ |
|
"epoch": 471.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.980907917022705, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 17898 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"learning_rate": 5.6e-08, |
|
"loss": 3.4861, |
|
"step": 17936 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.9806013107299805, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.631, |
|
"step": 17936 |
|
}, |
|
{ |
|
"epoch": 473.0, |
|
"learning_rate": 5.3999999999999994e-08, |
|
"loss": 3.4825, |
|
"step": 17974 |
|
}, |
|
{ |
|
"epoch": 473.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.9808974266052246, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 17974 |
|
}, |
|
{ |
|
"epoch": 474.0, |
|
"learning_rate": 5.1999999999999996e-08, |
|
"loss": 3.4758, |
|
"step": 18012 |
|
}, |
|
{ |
|
"epoch": 474.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.98111629486084, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 18012 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"learning_rate": 5e-08, |
|
"loss": 3.4811, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"eval_accuracy": 0.42350928641251223, |
|
"eval_loss": 3.980703592300415, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.629, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"learning_rate": 4.8e-08, |
|
"loss": 3.4831, |
|
"step": 18088 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.980832815170288, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 18088 |
|
}, |
|
{ |
|
"epoch": 477.0, |
|
"learning_rate": 4.5999999999999995e-08, |
|
"loss": 3.4837, |
|
"step": 18126 |
|
}, |
|
{ |
|
"epoch": 477.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.980334758758545, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 6.526, |
|
"eval_steps_per_second": 1.631, |
|
"step": 18126 |
|
}, |
|
{ |
|
"epoch": 478.0, |
|
"learning_rate": 4.4e-08, |
|
"loss": 3.4843, |
|
"step": 18164 |
|
}, |
|
{ |
|
"epoch": 478.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9802615642547607, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 1.63, |
|
"step": 18164 |
|
}, |
|
{ |
|
"epoch": 479.0, |
|
"learning_rate": 4.2e-08, |
|
"loss": 3.4825, |
|
"step": 18202 |
|
}, |
|
{ |
|
"epoch": 479.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9801557064056396, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 18202 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"learning_rate": 4e-08, |
|
"loss": 3.4807, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.979966163635254, |
|
"eval_runtime": 0.6189, |
|
"eval_samples_per_second": 6.463, |
|
"eval_steps_per_second": 1.616, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 481.0, |
|
"learning_rate": 3.7999999999999996e-08, |
|
"loss": 3.4808, |
|
"step": 18278 |
|
}, |
|
{ |
|
"epoch": 481.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9796664714813232, |
|
"eval_runtime": 0.618, |
|
"eval_samples_per_second": 6.473, |
|
"eval_steps_per_second": 1.618, |
|
"step": 18278 |
|
}, |
|
{ |
|
"epoch": 482.0, |
|
"learning_rate": 3.6e-08, |
|
"loss": 3.4805, |
|
"step": 18316 |
|
}, |
|
{ |
|
"epoch": 482.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9796643257141113, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 18316 |
|
}, |
|
{ |
|
"epoch": 483.0, |
|
"learning_rate": 3.4e-08, |
|
"loss": 3.4818, |
|
"step": 18354 |
|
}, |
|
{ |
|
"epoch": 483.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.979565382003784, |
|
"eval_runtime": 0.6125, |
|
"eval_samples_per_second": 6.531, |
|
"eval_steps_per_second": 1.633, |
|
"step": 18354 |
|
}, |
|
{ |
|
"epoch": 484.0, |
|
"learning_rate": 3.2e-08, |
|
"loss": 3.4821, |
|
"step": 18392 |
|
}, |
|
{ |
|
"epoch": 484.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9793689250946045, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 1.629, |
|
"step": 18392 |
|
}, |
|
{ |
|
"epoch": 485.0, |
|
"learning_rate": 3e-08, |
|
"loss": 3.4802, |
|
"step": 18430 |
|
}, |
|
{ |
|
"epoch": 485.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.979444980621338, |
|
"eval_runtime": 0.6144, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 1.628, |
|
"step": 18430 |
|
}, |
|
{ |
|
"epoch": 486.0, |
|
"learning_rate": 2.8e-08, |
|
"loss": 3.4805, |
|
"step": 18468 |
|
}, |
|
{ |
|
"epoch": 486.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.979588508605957, |
|
"eval_runtime": 0.6134, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 1.63, |
|
"step": 18468 |
|
}, |
|
{ |
|
"epoch": 487.0, |
|
"learning_rate": 2.5999999999999998e-08, |
|
"loss": 3.4831, |
|
"step": 18506 |
|
}, |
|
{ |
|
"epoch": 487.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9796085357666016, |
|
"eval_runtime": 0.6257, |
|
"eval_samples_per_second": 6.393, |
|
"eval_steps_per_second": 1.598, |
|
"step": 18506 |
|
}, |
|
{ |
|
"epoch": 488.0, |
|
"learning_rate": 2.4e-08, |
|
"loss": 3.4846, |
|
"step": 18544 |
|
}, |
|
{ |
|
"epoch": 488.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.97976016998291, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 6.507, |
|
"eval_steps_per_second": 1.627, |
|
"step": 18544 |
|
}, |
|
{ |
|
"epoch": 489.0, |
|
"learning_rate": 2.2e-08, |
|
"loss": 3.4824, |
|
"step": 18582 |
|
}, |
|
{ |
|
"epoch": 489.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9797983169555664, |
|
"eval_runtime": 0.6255, |
|
"eval_samples_per_second": 6.395, |
|
"eval_steps_per_second": 1.599, |
|
"step": 18582 |
|
}, |
|
{ |
|
"epoch": 490.0, |
|
"learning_rate": 2e-08, |
|
"loss": 3.4807, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 490.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9798743724823, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 1.627, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 491.0, |
|
"learning_rate": 1.8e-08, |
|
"loss": 3.4809, |
|
"step": 18658 |
|
}, |
|
{ |
|
"epoch": 491.0, |
|
"eval_accuracy": 0.4239980449657869, |
|
"eval_loss": 3.9799368381500244, |
|
"eval_runtime": 0.6143, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.628, |
|
"step": 18658 |
|
}, |
|
{ |
|
"epoch": 492.0, |
|
"learning_rate": 1.6e-08, |
|
"loss": 3.4801, |
|
"step": 18696 |
|
}, |
|
{ |
|
"epoch": 492.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9799206256866455, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 18696 |
|
}, |
|
{ |
|
"epoch": 493.0, |
|
"learning_rate": 1.4e-08, |
|
"loss": 3.479, |
|
"step": 18734 |
|
}, |
|
{ |
|
"epoch": 493.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9799153804779053, |
|
"eval_runtime": 0.6136, |
|
"eval_samples_per_second": 6.519, |
|
"eval_steps_per_second": 1.63, |
|
"step": 18734 |
|
}, |
|
{ |
|
"epoch": 494.0, |
|
"learning_rate": 1.2e-08, |
|
"loss": 3.48, |
|
"step": 18772 |
|
}, |
|
{ |
|
"epoch": 494.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9799201488494873, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 1.632, |
|
"step": 18772 |
|
}, |
|
{ |
|
"epoch": 495.0, |
|
"learning_rate": 1e-08, |
|
"loss": 3.4828, |
|
"step": 18810 |
|
}, |
|
{ |
|
"epoch": 495.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9799094200134277, |
|
"eval_runtime": 0.6243, |
|
"eval_samples_per_second": 6.407, |
|
"eval_steps_per_second": 1.602, |
|
"step": 18810 |
|
}, |
|
{ |
|
"epoch": 496.0, |
|
"learning_rate": 8e-09, |
|
"loss": 3.4812, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 496.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.979907512664795, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 1.629, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 497.0, |
|
"learning_rate": 6e-09, |
|
"loss": 3.4798, |
|
"step": 18886 |
|
}, |
|
{ |
|
"epoch": 497.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.9798967838287354, |
|
"eval_runtime": 0.6148, |
|
"eval_samples_per_second": 6.506, |
|
"eval_steps_per_second": 1.626, |
|
"step": 18886 |
|
}, |
|
{ |
|
"epoch": 498.0, |
|
"learning_rate": 4e-09, |
|
"loss": 3.4866, |
|
"step": 18924 |
|
}, |
|
{ |
|
"epoch": 498.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.979888677597046, |
|
"eval_runtime": 0.614, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 1.629, |
|
"step": 18924 |
|
}, |
|
{ |
|
"epoch": 499.0, |
|
"learning_rate": 2e-09, |
|
"loss": 3.4785, |
|
"step": 18962 |
|
}, |
|
{ |
|
"epoch": 499.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.979886054992676, |
|
"eval_runtime": 0.6141, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 1.628, |
|
"step": 18962 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 0.0, |
|
"loss": 3.4893, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_accuracy": 0.4237536656891496, |
|
"eval_loss": 3.979886293411255, |
|
"eval_runtime": 0.6133, |
|
"eval_samples_per_second": 6.522, |
|
"eval_steps_per_second": 1.63, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 19000, |
|
"total_flos": 8.26946617344e+16, |
|
"train_loss": 3.948820646587171, |
|
"train_runtime": 16522.6397, |
|
"train_samples_per_second": 1.15, |
|
"train_steps_per_second": 1.15 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 19000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 500, |
|
"save_steps": 500, |
|
"total_flos": 8.26946617344e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|