|
{ |
|
"best_metric": 0.8166666666666667, |
|
"best_model_checkpoint": "swiftformer-xs-dmae-va-U5-42C\\checkpoint-418", |
|
"epoch": 72.25806451612904, |
|
"eval_steps": 500, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.3856309652328491, |
|
"eval_runtime": 1.1043, |
|
"eval_samples_per_second": 54.333, |
|
"eval_steps_per_second": 1.811, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.3855, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.3819035291671753, |
|
"eval_runtime": 0.7186, |
|
"eval_samples_per_second": 83.495, |
|
"eval_steps_per_second": 2.783, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.3687236309051514, |
|
"eval_runtime": 0.7101, |
|
"eval_samples_per_second": 84.495, |
|
"eval_steps_per_second": 2.817, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 1.3742, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.31666666666666665, |
|
"eval_loss": 1.3188554048538208, |
|
"eval_runtime": 0.726, |
|
"eval_samples_per_second": 82.647, |
|
"eval_steps_per_second": 2.755, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.00012857142857142858, |
|
"loss": 1.3004, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.2501040697097778, |
|
"eval_runtime": 0.725, |
|
"eval_samples_per_second": 82.755, |
|
"eval_steps_per_second": 2.759, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.2268178462982178, |
|
"eval_runtime": 0.7099, |
|
"eval_samples_per_second": 84.517, |
|
"eval_steps_per_second": 2.817, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 1.1716, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.211478352546692, |
|
"eval_runtime": 0.7071, |
|
"eval_samples_per_second": 84.855, |
|
"eval_steps_per_second": 2.828, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00019841269841269844, |
|
"loss": 1.0686, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.2243305444717407, |
|
"eval_runtime": 0.7126, |
|
"eval_samples_per_second": 84.197, |
|
"eval_steps_per_second": 2.807, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1432182788848877, |
|
"eval_runtime": 0.7953, |
|
"eval_samples_per_second": 75.447, |
|
"eval_steps_per_second": 2.515, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.00019365079365079365, |
|
"loss": 0.9764, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.020477294921875, |
|
"eval_runtime": 0.7686, |
|
"eval_samples_per_second": 78.061, |
|
"eval_steps_per_second": 2.602, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 0.00018888888888888888, |
|
"loss": 0.873, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.9721332788467407, |
|
"eval_runtime": 0.8664, |
|
"eval_samples_per_second": 69.254, |
|
"eval_steps_per_second": 2.308, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 0.9220641851425171, |
|
"eval_runtime": 0.7382, |
|
"eval_samples_per_second": 81.278, |
|
"eval_steps_per_second": 2.709, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.00018412698412698412, |
|
"loss": 0.7822, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 0.8593236207962036, |
|
"eval_runtime": 1.1748, |
|
"eval_samples_per_second": 51.071, |
|
"eval_steps_per_second": 1.702, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.00017936507936507938, |
|
"loss": 0.664, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.7774909734725952, |
|
"eval_runtime": 0.8265, |
|
"eval_samples_per_second": 72.595, |
|
"eval_steps_per_second": 2.42, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 0.8117440342903137, |
|
"eval_runtime": 0.873, |
|
"eval_samples_per_second": 68.731, |
|
"eval_steps_per_second": 2.291, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 0.00017460317460317462, |
|
"loss": 0.5439, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_loss": 0.7552784085273743, |
|
"eval_runtime": 0.8067, |
|
"eval_samples_per_second": 74.373, |
|
"eval_steps_per_second": 2.479, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.669671356678009, |
|
"eval_runtime": 0.7809, |
|
"eval_samples_per_second": 76.836, |
|
"eval_steps_per_second": 2.561, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 0.00016984126984126986, |
|
"loss": 0.496, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6479821801185608, |
|
"eval_runtime": 0.8358, |
|
"eval_samples_per_second": 71.789, |
|
"eval_steps_per_second": 2.393, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 0.0001650793650793651, |
|
"loss": 0.4563, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7115061283111572, |
|
"eval_runtime": 0.7213, |
|
"eval_samples_per_second": 83.184, |
|
"eval_steps_per_second": 2.773, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6776978373527527, |
|
"eval_runtime": 1.1968, |
|
"eval_samples_per_second": 50.132, |
|
"eval_steps_per_second": 1.671, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"learning_rate": 0.00016031746031746033, |
|
"loss": 0.3831, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.641558825969696, |
|
"eval_runtime": 0.7539, |
|
"eval_samples_per_second": 79.588, |
|
"eval_steps_per_second": 2.653, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.339, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.7040281891822815, |
|
"eval_runtime": 0.7707, |
|
"eval_samples_per_second": 77.852, |
|
"eval_steps_per_second": 2.595, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6858527064323425, |
|
"eval_runtime": 0.7222, |
|
"eval_samples_per_second": 83.082, |
|
"eval_steps_per_second": 2.769, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 0.0001507936507936508, |
|
"loss": 0.3033, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6012035608291626, |
|
"eval_runtime": 0.8804, |
|
"eval_samples_per_second": 68.154, |
|
"eval_steps_per_second": 2.272, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 0.00014603174603174603, |
|
"loss": 0.2655, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.5439515709877014, |
|
"eval_runtime": 0.8773, |
|
"eval_samples_per_second": 68.39, |
|
"eval_steps_per_second": 2.28, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6173577904701233, |
|
"eval_runtime": 0.755, |
|
"eval_samples_per_second": 79.471, |
|
"eval_steps_per_second": 2.649, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 0.0001412698412698413, |
|
"loss": 0.2269, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.5745389461517334, |
|
"eval_runtime": 0.7541, |
|
"eval_samples_per_second": 79.562, |
|
"eval_steps_per_second": 2.652, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"learning_rate": 0.0001365079365079365, |
|
"loss": 0.2472, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.5688391327857971, |
|
"eval_runtime": 0.7386, |
|
"eval_samples_per_second": 81.235, |
|
"eval_steps_per_second": 2.708, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6578179597854614, |
|
"eval_runtime": 0.7981, |
|
"eval_samples_per_second": 75.174, |
|
"eval_steps_per_second": 2.506, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 0.00013174603174603174, |
|
"loss": 0.2004, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.5811216831207275, |
|
"eval_runtime": 0.7017, |
|
"eval_samples_per_second": 85.512, |
|
"eval_steps_per_second": 2.85, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 0.00012698412698412698, |
|
"loss": 0.2099, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6672316193580627, |
|
"eval_runtime": 0.9579, |
|
"eval_samples_per_second": 62.635, |
|
"eval_steps_per_second": 2.088, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5926868319511414, |
|
"eval_runtime": 0.7151, |
|
"eval_samples_per_second": 83.907, |
|
"eval_steps_per_second": 2.797, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"learning_rate": 0.00012222222222222224, |
|
"loss": 0.1834, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.6193079352378845, |
|
"eval_runtime": 0.7137, |
|
"eval_samples_per_second": 84.073, |
|
"eval_steps_per_second": 2.802, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7505124807357788, |
|
"eval_runtime": 0.8955, |
|
"eval_samples_per_second": 67.004, |
|
"eval_steps_per_second": 2.233, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"learning_rate": 0.00011746031746031746, |
|
"loss": 0.2248, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7730365991592407, |
|
"eval_runtime": 0.7499, |
|
"eval_samples_per_second": 80.008, |
|
"eval_steps_per_second": 2.667, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"learning_rate": 0.0001126984126984127, |
|
"loss": 0.1571, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6211021542549133, |
|
"eval_runtime": 0.9402, |
|
"eval_samples_per_second": 63.814, |
|
"eval_steps_per_second": 2.127, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6227646470069885, |
|
"eval_runtime": 0.747, |
|
"eval_samples_per_second": 80.323, |
|
"eval_steps_per_second": 2.677, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 0.00010793650793650794, |
|
"loss": 0.1983, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6088116765022278, |
|
"eval_runtime": 0.6721, |
|
"eval_samples_per_second": 89.274, |
|
"eval_steps_per_second": 2.976, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 0.00010317460317460319, |
|
"loss": 0.1629, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 38.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7009025812149048, |
|
"eval_runtime": 0.6916, |
|
"eval_samples_per_second": 86.756, |
|
"eval_steps_per_second": 2.892, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7284848690032959, |
|
"eval_runtime": 0.6756, |
|
"eval_samples_per_second": 88.806, |
|
"eval_steps_per_second": 2.96, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.26, |
|
"learning_rate": 9.841269841269841e-05, |
|
"loss": 0.1547, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 40.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.6401079297065735, |
|
"eval_runtime": 1.0498, |
|
"eval_samples_per_second": 57.152, |
|
"eval_steps_per_second": 1.905, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 41.81, |
|
"learning_rate": 9.365079365079366e-05, |
|
"loss": 0.1548, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.6122706532478333, |
|
"eval_runtime": 0.7047, |
|
"eval_samples_per_second": 85.142, |
|
"eval_steps_per_second": 2.838, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 42.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6316841244697571, |
|
"eval_runtime": 0.7237, |
|
"eval_samples_per_second": 82.911, |
|
"eval_steps_per_second": 2.764, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 43.35, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.1566, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7579439282417297, |
|
"eval_runtime": 0.6901, |
|
"eval_samples_per_second": 86.944, |
|
"eval_steps_per_second": 2.898, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 44.9, |
|
"learning_rate": 8.412698412698413e-05, |
|
"loss": 0.1361, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 44.9, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6652740836143494, |
|
"eval_runtime": 0.7005, |
|
"eval_samples_per_second": 85.657, |
|
"eval_steps_per_second": 2.855, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 45.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7401434779167175, |
|
"eval_runtime": 0.6837, |
|
"eval_samples_per_second": 87.759, |
|
"eval_steps_per_second": 2.925, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 7.936507936507937e-05, |
|
"loss": 0.1273, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 46.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.8404071927070618, |
|
"eval_runtime": 0.852, |
|
"eval_samples_per_second": 70.419, |
|
"eval_steps_per_second": 2.347, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 7.460317460317461e-05, |
|
"loss": 0.1312, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.8388133645057678, |
|
"eval_runtime": 0.7625, |
|
"eval_samples_per_second": 78.691, |
|
"eval_steps_per_second": 2.623, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 48.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.7823358774185181, |
|
"eval_runtime": 0.7984, |
|
"eval_samples_per_second": 75.152, |
|
"eval_steps_per_second": 2.505, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 49.55, |
|
"learning_rate": 6.984126984126984e-05, |
|
"loss": 0.1307, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 49.94, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6979826092720032, |
|
"eval_runtime": 0.9296, |
|
"eval_samples_per_second": 64.545, |
|
"eval_steps_per_second": 2.151, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 50.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7589060664176941, |
|
"eval_runtime": 0.9421, |
|
"eval_samples_per_second": 63.685, |
|
"eval_steps_per_second": 2.123, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 51.1, |
|
"learning_rate": 6.507936507936509e-05, |
|
"loss": 0.1061, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.664362907409668, |
|
"eval_runtime": 0.9581, |
|
"eval_samples_per_second": 62.625, |
|
"eval_steps_per_second": 2.088, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 52.65, |
|
"learning_rate": 6.0317460317460316e-05, |
|
"loss": 0.1186, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7056966423988342, |
|
"eval_runtime": 0.7627, |
|
"eval_samples_per_second": 78.67, |
|
"eval_steps_per_second": 2.622, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 53.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.6744123697280884, |
|
"eval_runtime": 0.7522, |
|
"eval_samples_per_second": 79.767, |
|
"eval_steps_per_second": 2.659, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 54.19, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.1108, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 54.97, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.6327721476554871, |
|
"eval_runtime": 0.7919, |
|
"eval_samples_per_second": 75.763, |
|
"eval_steps_per_second": 2.525, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 55.74, |
|
"learning_rate": 5.0793650793650794e-05, |
|
"loss": 0.1014, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.6401543021202087, |
|
"eval_runtime": 0.7047, |
|
"eval_samples_per_second": 85.145, |
|
"eval_steps_per_second": 2.838, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 56.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6631258726119995, |
|
"eval_runtime": 0.7567, |
|
"eval_samples_per_second": 79.292, |
|
"eval_steps_per_second": 2.643, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 57.29, |
|
"learning_rate": 4.603174603174603e-05, |
|
"loss": 0.1082, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7001372575759888, |
|
"eval_runtime": 0.7863, |
|
"eval_samples_per_second": 76.308, |
|
"eval_steps_per_second": 2.544, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 58.84, |
|
"learning_rate": 4.126984126984127e-05, |
|
"loss": 0.1118, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7898235321044922, |
|
"eval_runtime": 0.8339, |
|
"eval_samples_per_second": 71.951, |
|
"eval_steps_per_second": 2.398, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7644184231758118, |
|
"eval_runtime": 0.916, |
|
"eval_samples_per_second": 65.499, |
|
"eval_steps_per_second": 2.183, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 60.39, |
|
"learning_rate": 3.650793650793651e-05, |
|
"loss": 0.1051, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 60.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7767077088356018, |
|
"eval_runtime": 0.7368, |
|
"eval_samples_per_second": 81.432, |
|
"eval_steps_per_second": 2.714, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.0979, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7440354824066162, |
|
"eval_runtime": 0.8613, |
|
"eval_samples_per_second": 69.665, |
|
"eval_steps_per_second": 2.322, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 62.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6826977133750916, |
|
"eval_runtime": 0.8727, |
|
"eval_samples_per_second": 68.75, |
|
"eval_steps_per_second": 2.292, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 63.48, |
|
"learning_rate": 2.6984126984126984e-05, |
|
"loss": 0.0834, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7008056640625, |
|
"eval_runtime": 1.0337, |
|
"eval_samples_per_second": 58.046, |
|
"eval_steps_per_second": 1.935, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 64.9, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7242893576622009, |
|
"eval_runtime": 0.9008, |
|
"eval_samples_per_second": 66.608, |
|
"eval_steps_per_second": 2.22, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 65.03, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0963, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 65.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7655950784683228, |
|
"eval_runtime": 0.7846, |
|
"eval_samples_per_second": 76.471, |
|
"eval_steps_per_second": 2.549, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 66.58, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 0.0989, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 66.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.733224093914032, |
|
"eval_runtime": 0.6942, |
|
"eval_samples_per_second": 86.424, |
|
"eval_steps_per_second": 2.881, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7623672485351562, |
|
"eval_runtime": 0.6962, |
|
"eval_samples_per_second": 86.187, |
|
"eval_steps_per_second": 2.873, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 68.13, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 0.107, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 68.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7291642427444458, |
|
"eval_runtime": 0.8099, |
|
"eval_samples_per_second": 74.08, |
|
"eval_steps_per_second": 2.469, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 69.68, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.0987, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 69.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7168775796890259, |
|
"eval_runtime": 0.9171, |
|
"eval_samples_per_second": 65.421, |
|
"eval_steps_per_second": 2.181, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.746231734752655, |
|
"eval_runtime": 0.7381, |
|
"eval_samples_per_second": 81.293, |
|
"eval_steps_per_second": 2.71, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 71.23, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.0956, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6656435132026672, |
|
"eval_runtime": 0.7048, |
|
"eval_samples_per_second": 85.132, |
|
"eval_steps_per_second": 2.838, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 72.26, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6873000860214233, |
|
"eval_runtime": 0.744, |
|
"eval_samples_per_second": 80.641, |
|
"eval_steps_per_second": 2.688, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 72.26, |
|
"step": 560, |
|
"total_flos": 1.9293876649171354e+17, |
|
"train_loss": 0.3574366893087115, |
|
"train_runtime": 357.0027, |
|
"train_samples_per_second": 218.262, |
|
"train_steps_per_second": 1.569 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 80, |
|
"save_steps": 500, |
|
"total_flos": 1.9293876649171354e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|