|
{ |
|
"best_metric": 0.8638888888888889, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-inaturalist/checkpoint-140", |
|
"epoch": 80.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.03333333333333333, |
|
"eval_loss": 3.200693368911743, |
|
"eval_runtime": 3.9107, |
|
"eval_samples_per_second": 184.111, |
|
"eval_steps_per_second": 1.534, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.044444444444444446, |
|
"eval_loss": 3.188920021057129, |
|
"eval_runtime": 3.5624, |
|
"eval_samples_per_second": 202.11, |
|
"eval_steps_per_second": 1.684, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.06388888888888888, |
|
"eval_loss": 3.1746575832366943, |
|
"eval_runtime": 3.358, |
|
"eval_samples_per_second": 214.412, |
|
"eval_steps_per_second": 1.787, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.6863054633140564, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.1888, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.10972222222222222, |
|
"eval_loss": 3.144199848175049, |
|
"eval_runtime": 3.3224, |
|
"eval_samples_per_second": 216.708, |
|
"eval_steps_per_second": 1.806, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.14583333333333334, |
|
"eval_loss": 3.1183247566223145, |
|
"eval_runtime": 3.3659, |
|
"eval_samples_per_second": 213.913, |
|
"eval_steps_per_second": 1.783, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.21944444444444444, |
|
"eval_loss": 3.071033477783203, |
|
"eval_runtime": 3.1013, |
|
"eval_samples_per_second": 232.158, |
|
"eval_steps_per_second": 1.935, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.30416666666666664, |
|
"eval_loss": 3.0330796241760254, |
|
"eval_runtime": 3.0337, |
|
"eval_samples_per_second": 237.333, |
|
"eval_steps_per_second": 1.978, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.7356524467468262, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0673, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4388888888888889, |
|
"eval_loss": 2.96268367767334, |
|
"eval_runtime": 3.2316, |
|
"eval_samples_per_second": 222.8, |
|
"eval_steps_per_second": 1.857, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.49444444444444446, |
|
"eval_loss": 2.9109385013580322, |
|
"eval_runtime": 3.1222, |
|
"eval_samples_per_second": 230.605, |
|
"eval_steps_per_second": 1.922, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5763888888888888, |
|
"eval_loss": 2.8359875679016113, |
|
"eval_runtime": 3.0808, |
|
"eval_samples_per_second": 233.708, |
|
"eval_steps_per_second": 1.948, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"eval_accuracy": 0.6055555555555555, |
|
"eval_loss": 2.780921459197998, |
|
"eval_runtime": 3.0798, |
|
"eval_samples_per_second": 233.781, |
|
"eval_steps_per_second": 1.948, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.76748126745224, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 2.8151, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6541666666666667, |
|
"eval_loss": 2.695770025253296, |
|
"eval_runtime": 3.3541, |
|
"eval_samples_per_second": 214.662, |
|
"eval_steps_per_second": 1.789, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.6763888888888889, |
|
"eval_loss": 2.640125036239624, |
|
"eval_runtime": 3.1085, |
|
"eval_samples_per_second": 231.622, |
|
"eval_steps_per_second": 1.93, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6944444444444444, |
|
"eval_loss": 2.558335304260254, |
|
"eval_runtime": 3.0528, |
|
"eval_samples_per_second": 235.852, |
|
"eval_steps_per_second": 1.965, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"eval_accuracy": 0.7083333333333334, |
|
"eval_loss": 2.503415822982788, |
|
"eval_runtime": 3.1925, |
|
"eval_samples_per_second": 225.525, |
|
"eval_steps_per_second": 1.879, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.8363860249519348, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 2.5143, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7347222222222223, |
|
"eval_loss": 2.4201643466949463, |
|
"eval_runtime": 3.0856, |
|
"eval_samples_per_second": 233.339, |
|
"eval_steps_per_second": 1.944, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.7375, |
|
"eval_loss": 2.3662188053131104, |
|
"eval_runtime": 3.0872, |
|
"eval_samples_per_second": 233.218, |
|
"eval_steps_per_second": 1.943, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7444444444444445, |
|
"eval_loss": 2.2883973121643066, |
|
"eval_runtime": 3.1262, |
|
"eval_samples_per_second": 230.314, |
|
"eval_steps_per_second": 1.919, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"eval_accuracy": 0.7569444444444444, |
|
"eval_loss": 2.237414598464966, |
|
"eval_runtime": 3.0834, |
|
"eval_samples_per_second": 233.51, |
|
"eval_steps_per_second": 1.946, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.8517465591430664, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.2236, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 2.1632001399993896, |
|
"eval_runtime": 3.0769, |
|
"eval_samples_per_second": 234.001, |
|
"eval_steps_per_second": 1.95, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 2.1174519062042236, |
|
"eval_runtime": 3.0592, |
|
"eval_samples_per_second": 235.356, |
|
"eval_steps_per_second": 1.961, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7930555555555555, |
|
"eval_loss": 2.052760124206543, |
|
"eval_runtime": 3.21, |
|
"eval_samples_per_second": 224.301, |
|
"eval_steps_per_second": 1.869, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"eval_accuracy": 0.7958333333333333, |
|
"eval_loss": 2.009880304336548, |
|
"eval_runtime": 3.0534, |
|
"eval_samples_per_second": 235.801, |
|
"eval_steps_per_second": 1.965, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.8587987422943115, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.9677, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8013888888888889, |
|
"eval_loss": 1.9488461017608643, |
|
"eval_runtime": 3.1017, |
|
"eval_samples_per_second": 232.129, |
|
"eval_steps_per_second": 1.934, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_accuracy": 0.8097222222222222, |
|
"eval_loss": 1.9112929105758667, |
|
"eval_runtime": 3.0366, |
|
"eval_samples_per_second": 237.104, |
|
"eval_steps_per_second": 1.976, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8138888888888889, |
|
"eval_loss": 1.8581663370132446, |
|
"eval_runtime": 3.0592, |
|
"eval_samples_per_second": 235.359, |
|
"eval_steps_per_second": 1.961, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"eval_accuracy": 0.8138888888888889, |
|
"eval_loss": 1.8241873979568481, |
|
"eval_runtime": 3.2249, |
|
"eval_samples_per_second": 223.26, |
|
"eval_steps_per_second": 1.861, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.8794375061988831, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 1.7467, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8111111111111111, |
|
"eval_loss": 1.7740373611450195, |
|
"eval_runtime": 3.1535, |
|
"eval_samples_per_second": 228.319, |
|
"eval_steps_per_second": 1.903, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_accuracy": 0.8055555555555556, |
|
"eval_loss": 1.7457906007766724, |
|
"eval_runtime": 3.0886, |
|
"eval_samples_per_second": 233.114, |
|
"eval_steps_per_second": 1.943, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8180555555555555, |
|
"eval_loss": 1.7013169527053833, |
|
"eval_runtime": 3.0478, |
|
"eval_samples_per_second": 236.234, |
|
"eval_steps_per_second": 1.969, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"eval_accuracy": 0.8194444444444444, |
|
"eval_loss": 1.6714116334915161, |
|
"eval_runtime": 3.1668, |
|
"eval_samples_per_second": 227.361, |
|
"eval_steps_per_second": 1.895, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.9110932946205139, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.5765, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8263888888888888, |
|
"eval_loss": 1.631589412689209, |
|
"eval_runtime": 3.3174, |
|
"eval_samples_per_second": 217.038, |
|
"eval_steps_per_second": 1.809, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy": 0.8236111111111111, |
|
"eval_loss": 1.6082607507705688, |
|
"eval_runtime": 3.107, |
|
"eval_samples_per_second": 231.734, |
|
"eval_steps_per_second": 1.931, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8291666666666667, |
|
"eval_loss": 1.5738186836242676, |
|
"eval_runtime": 3.1316, |
|
"eval_samples_per_second": 229.912, |
|
"eval_steps_per_second": 1.916, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"eval_accuracy": 0.8347222222222223, |
|
"eval_loss": 1.553105354309082, |
|
"eval_runtime": 3.2321, |
|
"eval_samples_per_second": 222.767, |
|
"eval_steps_per_second": 1.856, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.9191176891326904, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 1.4431, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8430555555555556, |
|
"eval_loss": 1.5228244066238403, |
|
"eval_runtime": 3.0649, |
|
"eval_samples_per_second": 234.921, |
|
"eval_steps_per_second": 1.958, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.8444444444444444, |
|
"eval_loss": 1.5046004056930542, |
|
"eval_runtime": 3.2047, |
|
"eval_samples_per_second": 224.669, |
|
"eval_steps_per_second": 1.872, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8472222222222222, |
|
"eval_loss": 1.4780092239379883, |
|
"eval_runtime": 3.0681, |
|
"eval_samples_per_second": 234.673, |
|
"eval_steps_per_second": 1.956, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"eval_accuracy": 0.8458333333333333, |
|
"eval_loss": 1.4607552289962769, |
|
"eval_runtime": 3.3287, |
|
"eval_samples_per_second": 216.303, |
|
"eval_steps_per_second": 1.803, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.896637499332428, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.3049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8458333333333333, |
|
"eval_loss": 1.435728907585144, |
|
"eval_runtime": 3.1142, |
|
"eval_samples_per_second": 231.201, |
|
"eval_steps_per_second": 1.927, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 1.4187902212142944, |
|
"eval_runtime": 3.144, |
|
"eval_samples_per_second": 229.01, |
|
"eval_steps_per_second": 1.908, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8527777777777777, |
|
"eval_loss": 1.3949499130249023, |
|
"eval_runtime": 3.0808, |
|
"eval_samples_per_second": 233.709, |
|
"eval_steps_per_second": 1.948, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"eval_accuracy": 0.8527777777777777, |
|
"eval_loss": 1.3807573318481445, |
|
"eval_runtime": 3.1783, |
|
"eval_samples_per_second": 226.537, |
|
"eval_steps_per_second": 1.888, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.8933643102645874, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.2312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8458333333333333, |
|
"eval_loss": 1.3636168241500854, |
|
"eval_runtime": 3.356, |
|
"eval_samples_per_second": 214.543, |
|
"eval_steps_per_second": 1.788, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_accuracy": 0.8486111111111111, |
|
"eval_loss": 1.3513400554656982, |
|
"eval_runtime": 3.2597, |
|
"eval_samples_per_second": 220.878, |
|
"eval_steps_per_second": 1.841, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8527777777777777, |
|
"eval_loss": 1.3329037427902222, |
|
"eval_runtime": 3.0929, |
|
"eval_samples_per_second": 232.788, |
|
"eval_steps_per_second": 1.94, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"eval_accuracy": 0.8527777777777777, |
|
"eval_loss": 1.319313883781433, |
|
"eval_runtime": 3.1007, |
|
"eval_samples_per_second": 232.203, |
|
"eval_steps_per_second": 1.935, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.9017526507377625, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.1368, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8527777777777777, |
|
"eval_loss": 1.3025320768356323, |
|
"eval_runtime": 3.1234, |
|
"eval_samples_per_second": 230.52, |
|
"eval_steps_per_second": 1.921, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"eval_accuracy": 0.8541666666666666, |
|
"eval_loss": 1.2945308685302734, |
|
"eval_runtime": 3.0692, |
|
"eval_samples_per_second": 234.59, |
|
"eval_steps_per_second": 1.955, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8527777777777777, |
|
"eval_loss": 1.2819503545761108, |
|
"eval_runtime": 3.1162, |
|
"eval_samples_per_second": 231.052, |
|
"eval_steps_per_second": 1.925, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"eval_accuracy": 0.8569444444444444, |
|
"eval_loss": 1.2704639434814453, |
|
"eval_runtime": 3.2771, |
|
"eval_samples_per_second": 219.706, |
|
"eval_steps_per_second": 1.831, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 0.8930371999740601, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 1.0821, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8583333333333333, |
|
"eval_loss": 1.2615665197372437, |
|
"eval_runtime": 3.1151, |
|
"eval_samples_per_second": 231.134, |
|
"eval_steps_per_second": 1.926, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"eval_accuracy": 0.8555555555555555, |
|
"eval_loss": 1.2545220851898193, |
|
"eval_runtime": 3.0635, |
|
"eval_samples_per_second": 235.024, |
|
"eval_steps_per_second": 1.959, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8541666666666666, |
|
"eval_loss": 1.2422840595245361, |
|
"eval_runtime": 3.0988, |
|
"eval_samples_per_second": 232.348, |
|
"eval_steps_per_second": 1.936, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"eval_accuracy": 0.8597222222222223, |
|
"eval_loss": 1.233168125152588, |
|
"eval_runtime": 3.1982, |
|
"eval_samples_per_second": 225.127, |
|
"eval_steps_per_second": 1.876, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 0.8568278551101685, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.0232, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.2210274934768677, |
|
"eval_runtime": 3.355, |
|
"eval_samples_per_second": 214.604, |
|
"eval_steps_per_second": 1.788, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"eval_accuracy": 0.8625, |
|
"eval_loss": 1.2160966396331787, |
|
"eval_runtime": 3.127, |
|
"eval_samples_per_second": 230.252, |
|
"eval_steps_per_second": 1.919, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8569444444444444, |
|
"eval_loss": 1.209418535232544, |
|
"eval_runtime": 3.1685, |
|
"eval_samples_per_second": 227.239, |
|
"eval_steps_per_second": 1.894, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"eval_accuracy": 0.8541666666666666, |
|
"eval_loss": 1.205717921257019, |
|
"eval_runtime": 3.1918, |
|
"eval_samples_per_second": 225.576, |
|
"eval_steps_per_second": 1.88, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.8556333780288696, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.9814, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 1.1972941160202026, |
|
"eval_runtime": 3.1515, |
|
"eval_samples_per_second": 228.465, |
|
"eval_steps_per_second": 1.904, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"eval_accuracy": 0.8486111111111111, |
|
"eval_loss": 1.1918764114379883, |
|
"eval_runtime": 3.349, |
|
"eval_samples_per_second": 214.987, |
|
"eval_steps_per_second": 1.792, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8625, |
|
"eval_loss": 1.1825212240219116, |
|
"eval_runtime": 3.1401, |
|
"eval_samples_per_second": 229.295, |
|
"eval_steps_per_second": 1.911, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"eval_accuracy": 0.8597222222222223, |
|
"eval_loss": 1.179900050163269, |
|
"eval_runtime": 3.239, |
|
"eval_samples_per_second": 222.293, |
|
"eval_steps_per_second": 1.852, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 0.827712893486023, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.9415, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8597222222222223, |
|
"eval_loss": 1.1716293096542358, |
|
"eval_runtime": 3.1517, |
|
"eval_samples_per_second": 228.447, |
|
"eval_steps_per_second": 1.904, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"eval_accuracy": 0.8625, |
|
"eval_loss": 1.166538119316101, |
|
"eval_runtime": 3.22, |
|
"eval_samples_per_second": 223.601, |
|
"eval_steps_per_second": 1.863, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1611206531524658, |
|
"eval_runtime": 3.323, |
|
"eval_samples_per_second": 216.67, |
|
"eval_steps_per_second": 1.806, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"eval_accuracy": 0.8625, |
|
"eval_loss": 1.1600357294082642, |
|
"eval_runtime": 3.2253, |
|
"eval_samples_per_second": 223.233, |
|
"eval_steps_per_second": 1.86, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"grad_norm": 0.9616327285766602, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.9135, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1577341556549072, |
|
"eval_runtime": 3.1095, |
|
"eval_samples_per_second": 231.548, |
|
"eval_steps_per_second": 1.93, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1546900272369385, |
|
"eval_runtime": 3.2289, |
|
"eval_samples_per_second": 222.988, |
|
"eval_steps_per_second": 1.858, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1493217945098877, |
|
"eval_runtime": 3.103, |
|
"eval_samples_per_second": 232.033, |
|
"eval_steps_per_second": 1.934, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_loss": 1.1463639736175537, |
|
"eval_runtime": 3.3191, |
|
"eval_samples_per_second": 216.926, |
|
"eval_steps_per_second": 1.808, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"grad_norm": 0.8919360637664795, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.8946, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8555555555555555, |
|
"eval_loss": 1.1423206329345703, |
|
"eval_runtime": 3.0106, |
|
"eval_samples_per_second": 239.152, |
|
"eval_steps_per_second": 1.993, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_loss": 1.1402053833007812, |
|
"eval_runtime": 3.4453, |
|
"eval_samples_per_second": 208.981, |
|
"eval_steps_per_second": 1.742, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8583333333333333, |
|
"eval_loss": 1.1375410556793213, |
|
"eval_runtime": 3.1143, |
|
"eval_samples_per_second": 231.188, |
|
"eval_steps_per_second": 1.927, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"eval_accuracy": 0.8597222222222223, |
|
"eval_loss": 1.1360384225845337, |
|
"eval_runtime": 3.2332, |
|
"eval_samples_per_second": 222.688, |
|
"eval_steps_per_second": 1.856, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"grad_norm": 0.9335024356842041, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.8866, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8625, |
|
"eval_loss": 1.134353756904602, |
|
"eval_runtime": 3.1331, |
|
"eval_samples_per_second": 229.808, |
|
"eval_steps_per_second": 1.915, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1333543062210083, |
|
"eval_runtime": 3.2199, |
|
"eval_samples_per_second": 223.611, |
|
"eval_steps_per_second": 1.863, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1323890686035156, |
|
"eval_runtime": 3.0815, |
|
"eval_samples_per_second": 233.653, |
|
"eval_steps_per_second": 1.947, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 78.8, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1320444345474243, |
|
"eval_runtime": 3.2638, |
|
"eval_samples_per_second": 220.604, |
|
"eval_steps_per_second": 1.838, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 0.8473469614982605, |
|
"learning_rate": 0.0, |
|
"loss": 0.8798, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8638888888888889, |
|
"eval_loss": 1.1318904161453247, |
|
"eval_runtime": 3.2096, |
|
"eval_samples_per_second": 224.325, |
|
"eval_steps_per_second": 1.869, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 200, |
|
"total_flos": 7.440697863438336e+18, |
|
"train_loss": 1.5909362745285034, |
|
"train_runtime": 1036.0381, |
|
"train_samples_per_second": 115.826, |
|
"train_steps_per_second": 0.193 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.440697863438336e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|