|
{ |
|
"best_metric": 0.7511737089201878, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-bootcamp/checkpoint-540", |
|
"epoch": 88.88888888888889, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_accuracy": 0.004694835680751174, |
|
"eval_loss": 4.284899711608887, |
|
"eval_runtime": 0.4639, |
|
"eval_samples_per_second": 459.187, |
|
"eval_steps_per_second": 15.091, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 4.8882575035095215, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 4.3139, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.925925925925926, |
|
"eval_accuracy": 0.03286384976525822, |
|
"eval_loss": 4.184599876403809, |
|
"eval_runtime": 0.4473, |
|
"eval_samples_per_second": 476.176, |
|
"eval_steps_per_second": 15.649, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 4.126770496368408, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.1651, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"eval_accuracy": 0.056338028169014086, |
|
"eval_loss": 4.058472156524658, |
|
"eval_runtime": 0.4494, |
|
"eval_samples_per_second": 473.953, |
|
"eval_steps_per_second": 15.576, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.06103286384976526, |
|
"eval_loss": 3.952671766281128, |
|
"eval_runtime": 0.4514, |
|
"eval_samples_per_second": 471.822, |
|
"eval_steps_per_second": 15.506, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 4.966624736785889, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.9272, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"eval_accuracy": 0.06103286384976526, |
|
"eval_loss": 3.8813464641571045, |
|
"eval_runtime": 0.456, |
|
"eval_samples_per_second": 467.068, |
|
"eval_steps_per_second": 15.35, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"grad_norm": 5.827086448669434, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.7461, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"eval_accuracy": 0.08450704225352113, |
|
"eval_loss": 3.7535641193389893, |
|
"eval_runtime": 0.4503, |
|
"eval_samples_per_second": 473.021, |
|
"eval_steps_per_second": 15.545, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.962962962962963, |
|
"eval_accuracy": 0.107981220657277, |
|
"eval_loss": 3.648564100265503, |
|
"eval_runtime": 0.4501, |
|
"eval_samples_per_second": 473.231, |
|
"eval_steps_per_second": 15.552, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"grad_norm": 6.3525919914245605, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 3.5254, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.13615023474178403, |
|
"eval_loss": 3.56028151512146, |
|
"eval_runtime": 0.4596, |
|
"eval_samples_per_second": 463.489, |
|
"eval_steps_per_second": 15.232, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 8.264792442321777, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3478, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_accuracy": 0.13615023474178403, |
|
"eval_loss": 3.4565625190734863, |
|
"eval_runtime": 0.456, |
|
"eval_samples_per_second": 467.076, |
|
"eval_steps_per_second": 15.35, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.925925925925926, |
|
"eval_accuracy": 0.15023474178403756, |
|
"eval_loss": 3.2985825538635254, |
|
"eval_runtime": 0.4601, |
|
"eval_samples_per_second": 462.915, |
|
"eval_steps_per_second": 15.213, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 10.37037037037037, |
|
"grad_norm": 10.286266326904297, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 3.0423, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.962962962962964, |
|
"eval_accuracy": 0.15492957746478872, |
|
"eval_loss": 3.2165536880493164, |
|
"eval_runtime": 0.4547, |
|
"eval_samples_per_second": 468.397, |
|
"eval_steps_per_second": 15.393, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 11.851851851851851, |
|
"grad_norm": 9.160584449768066, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 2.7931, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.215962441314554, |
|
"eval_loss": 3.0202882289886475, |
|
"eval_runtime": 0.4563, |
|
"eval_samples_per_second": 466.806, |
|
"eval_steps_per_second": 15.341, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 12.88888888888889, |
|
"eval_accuracy": 0.29107981220657275, |
|
"eval_loss": 2.8990509510040283, |
|
"eval_runtime": 0.4577, |
|
"eval_samples_per_second": 465.39, |
|
"eval_steps_per_second": 15.294, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 9.414092063903809, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 2.541, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 13.925925925925926, |
|
"eval_accuracy": 0.29107981220657275, |
|
"eval_loss": 2.794092893600464, |
|
"eval_runtime": 0.4564, |
|
"eval_samples_per_second": 466.714, |
|
"eval_steps_per_second": 15.338, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"grad_norm": 9.282512664794922, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 2.3487, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 14.962962962962964, |
|
"eval_accuracy": 0.29107981220657275, |
|
"eval_loss": 2.733652353286743, |
|
"eval_runtime": 0.4626, |
|
"eval_samples_per_second": 460.475, |
|
"eval_steps_per_second": 15.133, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.36619718309859156, |
|
"eval_loss": 2.540092945098877, |
|
"eval_runtime": 0.4583, |
|
"eval_samples_per_second": 464.804, |
|
"eval_steps_per_second": 15.275, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 16.296296296296298, |
|
"grad_norm": 11.685836791992188, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 2.1043, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 16.88888888888889, |
|
"eval_accuracy": 0.38028169014084506, |
|
"eval_loss": 2.5088114738464355, |
|
"eval_runtime": 0.4588, |
|
"eval_samples_per_second": 464.257, |
|
"eval_steps_per_second": 15.257, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 11.694657325744629, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.8892, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 17.925925925925927, |
|
"eval_accuracy": 0.4131455399061033, |
|
"eval_loss": 2.3595798015594482, |
|
"eval_runtime": 0.4625, |
|
"eval_samples_per_second": 460.528, |
|
"eval_steps_per_second": 15.135, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 18.962962962962962, |
|
"eval_accuracy": 0.41784037558685444, |
|
"eval_loss": 2.317976713180542, |
|
"eval_runtime": 0.4557, |
|
"eval_samples_per_second": 467.42, |
|
"eval_steps_per_second": 15.361, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 19.25925925925926, |
|
"grad_norm": 9.399383544921875, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 1.7167, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4272300469483568, |
|
"eval_loss": 2.1819818019866943, |
|
"eval_runtime": 0.4566, |
|
"eval_samples_per_second": 466.514, |
|
"eval_steps_per_second": 15.331, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 20.74074074074074, |
|
"grad_norm": 8.570808410644531, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 1.5748, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 20.88888888888889, |
|
"eval_accuracy": 0.4413145539906103, |
|
"eval_loss": 2.0546934604644775, |
|
"eval_runtime": 0.4517, |
|
"eval_samples_per_second": 471.59, |
|
"eval_steps_per_second": 15.498, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 21.925925925925927, |
|
"eval_accuracy": 0.49295774647887325, |
|
"eval_loss": 1.9472370147705078, |
|
"eval_runtime": 0.4594, |
|
"eval_samples_per_second": 463.678, |
|
"eval_steps_per_second": 15.238, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 11.712135314941406, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.4052, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 22.962962962962962, |
|
"eval_accuracy": 0.48826291079812206, |
|
"eval_loss": 1.9053350687026978, |
|
"eval_runtime": 0.4637, |
|
"eval_samples_per_second": 459.37, |
|
"eval_steps_per_second": 15.097, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 23.703703703703702, |
|
"grad_norm": 11.016504287719727, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 1.2535, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5117370892018779, |
|
"eval_loss": 1.8178737163543701, |
|
"eval_runtime": 0.4639, |
|
"eval_samples_per_second": 459.108, |
|
"eval_steps_per_second": 15.088, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 24.88888888888889, |
|
"eval_accuracy": 0.5305164319248826, |
|
"eval_loss": 1.7599667310714722, |
|
"eval_runtime": 0.4596, |
|
"eval_samples_per_second": 463.449, |
|
"eval_steps_per_second": 15.231, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 25.185185185185187, |
|
"grad_norm": 10.845667839050293, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 1.1687, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 25.925925925925927, |
|
"eval_accuracy": 0.5492957746478874, |
|
"eval_loss": 1.6921813488006592, |
|
"eval_runtime": 0.4593, |
|
"eval_samples_per_second": 463.784, |
|
"eval_steps_per_second": 15.242, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 10.369373321533203, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.0719, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 26.962962962962962, |
|
"eval_accuracy": 0.5586854460093896, |
|
"eval_loss": 1.6075888872146606, |
|
"eval_runtime": 0.4618, |
|
"eval_samples_per_second": 461.281, |
|
"eval_steps_per_second": 15.159, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5586854460093896, |
|
"eval_loss": 1.5316015481948853, |
|
"eval_runtime": 0.4579, |
|
"eval_samples_per_second": 465.205, |
|
"eval_steps_per_second": 15.288, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 28.14814814814815, |
|
"grad_norm": 12.61413860321045, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 1.0577, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 28.88888888888889, |
|
"eval_accuracy": 0.5774647887323944, |
|
"eval_loss": 1.5365443229675293, |
|
"eval_runtime": 0.4629, |
|
"eval_samples_per_second": 460.173, |
|
"eval_steps_per_second": 15.123, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 29.62962962962963, |
|
"grad_norm": 10.047813415527344, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.9558, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 29.925925925925927, |
|
"eval_accuracy": 0.6291079812206573, |
|
"eval_loss": 1.448791742324829, |
|
"eval_runtime": 0.4616, |
|
"eval_samples_per_second": 461.458, |
|
"eval_steps_per_second": 15.165, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 30.962962962962962, |
|
"eval_accuracy": 0.6150234741784038, |
|
"eval_loss": 1.4185121059417725, |
|
"eval_runtime": 0.4583, |
|
"eval_samples_per_second": 464.754, |
|
"eval_steps_per_second": 15.274, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 31.11111111111111, |
|
"grad_norm": 8.121445655822754, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.8771, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 1.3905900716781616, |
|
"eval_runtime": 0.4637, |
|
"eval_samples_per_second": 459.353, |
|
"eval_steps_per_second": 15.096, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 32.592592592592595, |
|
"grad_norm": 9.448586463928223, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.8146, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 32.888888888888886, |
|
"eval_accuracy": 0.6150234741784038, |
|
"eval_loss": 1.3828094005584717, |
|
"eval_runtime": 0.4593, |
|
"eval_samples_per_second": 463.772, |
|
"eval_steps_per_second": 15.241, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 33.925925925925924, |
|
"eval_accuracy": 0.5821596244131455, |
|
"eval_loss": 1.3927448987960815, |
|
"eval_runtime": 0.4708, |
|
"eval_samples_per_second": 452.446, |
|
"eval_steps_per_second": 14.869, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 34.074074074074076, |
|
"grad_norm": 11.940274238586426, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.8228, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 34.96296296296296, |
|
"eval_accuracy": 0.6384976525821596, |
|
"eval_loss": 1.3035573959350586, |
|
"eval_runtime": 0.4602, |
|
"eval_samples_per_second": 462.817, |
|
"eval_steps_per_second": 15.21, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 35.55555555555556, |
|
"grad_norm": 10.717940330505371, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6878, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6807511737089202, |
|
"eval_loss": 1.2239941358566284, |
|
"eval_runtime": 0.4658, |
|
"eval_samples_per_second": 457.263, |
|
"eval_steps_per_second": 15.027, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 36.888888888888886, |
|
"eval_accuracy": 0.6713615023474179, |
|
"eval_loss": 1.2388455867767334, |
|
"eval_runtime": 0.4563, |
|
"eval_samples_per_second": 466.842, |
|
"eval_steps_per_second": 15.342, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 37.03703703703704, |
|
"grad_norm": 12.011445045471191, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.6471, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 37.925925925925924, |
|
"eval_accuracy": 0.6807511737089202, |
|
"eval_loss": 1.1344724893569946, |
|
"eval_runtime": 0.4612, |
|
"eval_samples_per_second": 461.873, |
|
"eval_steps_per_second": 15.179, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 38.51851851851852, |
|
"grad_norm": 9.849960327148438, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.6102, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 38.96296296296296, |
|
"eval_accuracy": 0.6572769953051644, |
|
"eval_loss": 1.18145751953125, |
|
"eval_runtime": 0.4589, |
|
"eval_samples_per_second": 464.119, |
|
"eval_steps_per_second": 15.253, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 10.17436408996582, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.6599, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6525821596244131, |
|
"eval_loss": 1.1720404624938965, |
|
"eval_runtime": 0.4628, |
|
"eval_samples_per_second": 460.238, |
|
"eval_steps_per_second": 15.125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 40.888888888888886, |
|
"eval_accuracy": 0.6525821596244131, |
|
"eval_loss": 1.1335811614990234, |
|
"eval_runtime": 0.4583, |
|
"eval_samples_per_second": 464.802, |
|
"eval_steps_per_second": 15.275, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 41.48148148148148, |
|
"grad_norm": 8.4024019241333, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.5742, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 41.925925925925924, |
|
"eval_accuracy": 0.6713615023474179, |
|
"eval_loss": 1.0863432884216309, |
|
"eval_runtime": 0.4663, |
|
"eval_samples_per_second": 456.75, |
|
"eval_steps_per_second": 15.011, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 42.96296296296296, |
|
"grad_norm": 11.1184720993042, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.5478, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 42.96296296296296, |
|
"eval_accuracy": 0.6713615023474179, |
|
"eval_loss": 1.0909894704818726, |
|
"eval_runtime": 0.4543, |
|
"eval_samples_per_second": 468.842, |
|
"eval_steps_per_second": 15.408, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 1.074562668800354, |
|
"eval_runtime": 0.4575, |
|
"eval_samples_per_second": 465.544, |
|
"eval_steps_per_second": 15.3, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 44.44444444444444, |
|
"grad_norm": 8.84053897857666, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.557, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 44.888888888888886, |
|
"eval_accuracy": 0.6807511737089202, |
|
"eval_loss": 1.0723693370819092, |
|
"eval_runtime": 0.4539, |
|
"eval_samples_per_second": 469.244, |
|
"eval_steps_per_second": 15.421, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 45.925925925925924, |
|
"grad_norm": 10.450238227844238, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.5753, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 45.925925925925924, |
|
"eval_accuracy": 0.7136150234741784, |
|
"eval_loss": 1.0107887983322144, |
|
"eval_runtime": 0.4643, |
|
"eval_samples_per_second": 458.736, |
|
"eval_steps_per_second": 15.076, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 46.96296296296296, |
|
"eval_accuracy": 0.6431924882629108, |
|
"eval_loss": 1.1295782327651978, |
|
"eval_runtime": 0.4606, |
|
"eval_samples_per_second": 462.398, |
|
"eval_steps_per_second": 15.196, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 47.407407407407405, |
|
"grad_norm": 9.998331069946289, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.5325, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 1.0361039638519287, |
|
"eval_runtime": 0.4659, |
|
"eval_samples_per_second": 457.192, |
|
"eval_steps_per_second": 15.025, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 48.888888888888886, |
|
"grad_norm": 7.716311454772949, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4349, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 48.888888888888886, |
|
"eval_accuracy": 0.6995305164319249, |
|
"eval_loss": 1.0237399339675903, |
|
"eval_runtime": 0.4605, |
|
"eval_samples_per_second": 462.536, |
|
"eval_steps_per_second": 15.201, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 49.925925925925924, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9789619445800781, |
|
"eval_runtime": 0.4655, |
|
"eval_samples_per_second": 457.587, |
|
"eval_steps_per_second": 15.038, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 50.370370370370374, |
|
"grad_norm": 11.521199226379395, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.447, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 50.96296296296296, |
|
"eval_accuracy": 0.6807511737089202, |
|
"eval_loss": 1.0409153699874878, |
|
"eval_runtime": 0.4603, |
|
"eval_samples_per_second": 462.789, |
|
"eval_steps_per_second": 15.209, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 51.851851851851855, |
|
"grad_norm": 7.985869884490967, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.4502, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6713615023474179, |
|
"eval_loss": 1.0466963052749634, |
|
"eval_runtime": 0.4518, |
|
"eval_samples_per_second": 471.396, |
|
"eval_steps_per_second": 15.492, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 52.888888888888886, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9772961139678955, |
|
"eval_runtime": 0.4537, |
|
"eval_samples_per_second": 469.512, |
|
"eval_steps_per_second": 15.43, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 53.333333333333336, |
|
"grad_norm": 6.707178592681885, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.4345, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 53.925925925925924, |
|
"eval_accuracy": 0.6807511737089202, |
|
"eval_loss": 0.9931105971336365, |
|
"eval_runtime": 0.4568, |
|
"eval_samples_per_second": 466.293, |
|
"eval_steps_per_second": 15.324, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 54.81481481481482, |
|
"grad_norm": 7.90395975112915, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.4557, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 54.96296296296296, |
|
"eval_accuracy": 0.7136150234741784, |
|
"eval_loss": 0.9685100317001343, |
|
"eval_runtime": 0.4567, |
|
"eval_samples_per_second": 466.339, |
|
"eval_steps_per_second": 15.326, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7370892018779343, |
|
"eval_loss": 0.9546511769294739, |
|
"eval_runtime": 0.4637, |
|
"eval_samples_per_second": 459.34, |
|
"eval_steps_per_second": 15.096, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 56.2962962962963, |
|
"grad_norm": 8.04978084564209, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.4109, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 56.888888888888886, |
|
"eval_accuracy": 0.6948356807511737, |
|
"eval_loss": 1.001466989517212, |
|
"eval_runtime": 0.4572, |
|
"eval_samples_per_second": 465.926, |
|
"eval_steps_per_second": 15.312, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 57.77777777777778, |
|
"grad_norm": 9.716485023498535, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.4406, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 57.925925925925924, |
|
"eval_accuracy": 0.7230046948356808, |
|
"eval_loss": 0.9410375952720642, |
|
"eval_runtime": 0.4627, |
|
"eval_samples_per_second": 460.386, |
|
"eval_steps_per_second": 15.13, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 58.96296296296296, |
|
"eval_accuracy": 0.6807511737089202, |
|
"eval_loss": 0.9764938950538635, |
|
"eval_runtime": 0.4625, |
|
"eval_samples_per_second": 460.496, |
|
"eval_steps_per_second": 15.134, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 59.25925925925926, |
|
"grad_norm": 7.345524311065674, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.4039, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7089201877934272, |
|
"eval_loss": 0.9505257606506348, |
|
"eval_runtime": 0.4624, |
|
"eval_samples_per_second": 460.681, |
|
"eval_steps_per_second": 15.14, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 60.74074074074074, |
|
"grad_norm": 9.50730037689209, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.396, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 60.888888888888886, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9539108872413635, |
|
"eval_runtime": 0.455, |
|
"eval_samples_per_second": 468.095, |
|
"eval_steps_per_second": 15.383, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 61.925925925925924, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 1.039088487625122, |
|
"eval_runtime": 0.461, |
|
"eval_samples_per_second": 462.085, |
|
"eval_steps_per_second": 15.186, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 62.22222222222222, |
|
"grad_norm": 7.710968971252441, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.3958, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 62.96296296296296, |
|
"eval_accuracy": 0.7136150234741784, |
|
"eval_loss": 0.9576095342636108, |
|
"eval_runtime": 0.4566, |
|
"eval_samples_per_second": 466.515, |
|
"eval_steps_per_second": 15.331, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 63.7037037037037, |
|
"grad_norm": 8.0850830078125, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.3763, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7230046948356808, |
|
"eval_loss": 0.937968909740448, |
|
"eval_runtime": 0.4607, |
|
"eval_samples_per_second": 462.317, |
|
"eval_steps_per_second": 15.194, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 64.88888888888889, |
|
"eval_accuracy": 0.7276995305164319, |
|
"eval_loss": 0.9363034963607788, |
|
"eval_runtime": 0.4601, |
|
"eval_samples_per_second": 462.97, |
|
"eval_steps_per_second": 15.215, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 65.18518518518519, |
|
"grad_norm": 8.919660568237305, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.3985, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 65.92592592592592, |
|
"eval_accuracy": 0.7089201877934272, |
|
"eval_loss": 0.9399967193603516, |
|
"eval_runtime": 0.4592, |
|
"eval_samples_per_second": 463.874, |
|
"eval_steps_per_second": 15.245, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 66.66666666666667, |
|
"grad_norm": 7.8991618156433105, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.3701, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 66.96296296296296, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9769109487533569, |
|
"eval_runtime": 0.4528, |
|
"eval_samples_per_second": 470.438, |
|
"eval_steps_per_second": 15.46, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7276995305164319, |
|
"eval_loss": 0.9604400992393494, |
|
"eval_runtime": 0.4581, |
|
"eval_samples_per_second": 464.916, |
|
"eval_steps_per_second": 15.279, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 68.14814814814815, |
|
"grad_norm": 8.74885082244873, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.3729, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 68.88888888888889, |
|
"eval_accuracy": 0.7089201877934272, |
|
"eval_loss": 0.9883025884628296, |
|
"eval_runtime": 0.4585, |
|
"eval_samples_per_second": 464.566, |
|
"eval_steps_per_second": 15.267, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 69.62962962962963, |
|
"grad_norm": 7.356982231140137, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.3958, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 69.92592592592592, |
|
"eval_accuracy": 0.7276995305164319, |
|
"eval_loss": 0.9516283273696899, |
|
"eval_runtime": 0.4626, |
|
"eval_samples_per_second": 460.471, |
|
"eval_steps_per_second": 15.133, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 70.96296296296296, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9252376556396484, |
|
"eval_runtime": 0.4582, |
|
"eval_samples_per_second": 464.872, |
|
"eval_steps_per_second": 15.277, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 71.11111111111111, |
|
"grad_norm": 7.72941255569458, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.359, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7136150234741784, |
|
"eval_loss": 0.9196011424064636, |
|
"eval_runtime": 0.4634, |
|
"eval_samples_per_second": 459.681, |
|
"eval_steps_per_second": 15.107, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 72.5925925925926, |
|
"grad_norm": 8.144768714904785, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.362, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 72.88888888888889, |
|
"eval_accuracy": 0.7230046948356808, |
|
"eval_loss": 0.9104124307632446, |
|
"eval_runtime": 0.4551, |
|
"eval_samples_per_second": 468.073, |
|
"eval_steps_per_second": 15.383, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 73.92592592592592, |
|
"eval_accuracy": 0.7136150234741784, |
|
"eval_loss": 0.9254885315895081, |
|
"eval_runtime": 0.458, |
|
"eval_samples_per_second": 465.078, |
|
"eval_steps_per_second": 15.284, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 74.07407407407408, |
|
"grad_norm": 8.89719009399414, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.353, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 74.96296296296296, |
|
"eval_accuracy": 0.7089201877934272, |
|
"eval_loss": 0.9358569979667664, |
|
"eval_runtime": 0.4614, |
|
"eval_samples_per_second": 461.641, |
|
"eval_steps_per_second": 15.171, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 75.55555555555556, |
|
"grad_norm": 10.696667671203613, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.345, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7230046948356808, |
|
"eval_loss": 0.9274182319641113, |
|
"eval_runtime": 0.4627, |
|
"eval_samples_per_second": 460.385, |
|
"eval_steps_per_second": 15.13, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 76.88888888888889, |
|
"eval_accuracy": 0.7370892018779343, |
|
"eval_loss": 0.9205775856971741, |
|
"eval_runtime": 0.4607, |
|
"eval_samples_per_second": 462.326, |
|
"eval_steps_per_second": 15.194, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 77.03703703703704, |
|
"grad_norm": 7.255312919616699, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.3414, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 77.92592592592592, |
|
"eval_accuracy": 0.7276995305164319, |
|
"eval_loss": 0.922869086265564, |
|
"eval_runtime": 0.4586, |
|
"eval_samples_per_second": 464.472, |
|
"eval_steps_per_second": 15.264, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 78.51851851851852, |
|
"grad_norm": 11.26202392578125, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.3298, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 78.96296296296296, |
|
"eval_accuracy": 0.7417840375586855, |
|
"eval_loss": 0.9102315902709961, |
|
"eval_runtime": 0.458, |
|
"eval_samples_per_second": 465.046, |
|
"eval_steps_per_second": 15.283, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 9.204602241516113, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.3394, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7511737089201878, |
|
"eval_loss": 0.8955033421516418, |
|
"eval_runtime": 0.457, |
|
"eval_samples_per_second": 466.115, |
|
"eval_steps_per_second": 15.318, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 80.88888888888889, |
|
"eval_accuracy": 0.7370892018779343, |
|
"eval_loss": 0.8956274390220642, |
|
"eval_runtime": 0.4576, |
|
"eval_samples_per_second": 465.457, |
|
"eval_steps_per_second": 15.297, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 81.48148148148148, |
|
"grad_norm": 5.736772537231445, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.3384, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 81.92592592592592, |
|
"eval_accuracy": 0.7276995305164319, |
|
"eval_loss": 0.8926992416381836, |
|
"eval_runtime": 0.4598, |
|
"eval_samples_per_second": 463.206, |
|
"eval_steps_per_second": 15.223, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 82.96296296296296, |
|
"grad_norm": 5.7758870124816895, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.3164, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 82.96296296296296, |
|
"eval_accuracy": 0.7417840375586855, |
|
"eval_loss": 0.8884522318840027, |
|
"eval_runtime": 0.4568, |
|
"eval_samples_per_second": 466.241, |
|
"eval_steps_per_second": 15.322, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7370892018779343, |
|
"eval_loss": 0.8940874338150024, |
|
"eval_runtime": 0.4584, |
|
"eval_samples_per_second": 464.708, |
|
"eval_steps_per_second": 15.272, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 84.44444444444444, |
|
"grad_norm": 8.919522285461426, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.3055, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 84.88888888888889, |
|
"eval_accuracy": 0.7417840375586855, |
|
"eval_loss": 0.8963488340377808, |
|
"eval_runtime": 0.4576, |
|
"eval_samples_per_second": 465.517, |
|
"eval_steps_per_second": 15.299, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 85.92592592592592, |
|
"grad_norm": 6.437784194946289, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.3355, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 85.92592592592592, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.8992050290107727, |
|
"eval_runtime": 0.452, |
|
"eval_samples_per_second": 471.26, |
|
"eval_steps_per_second": 15.487, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 86.96296296296296, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.8987627625465393, |
|
"eval_runtime": 0.462, |
|
"eval_samples_per_second": 461.085, |
|
"eval_steps_per_second": 15.153, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 87.4074074074074, |
|
"grad_norm": 7.563794136047363, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.3101, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.8968843817710876, |
|
"eval_runtime": 0.4577, |
|
"eval_samples_per_second": 465.337, |
|
"eval_steps_per_second": 15.293, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 88.88888888888889, |
|
"grad_norm": 6.048923015594482, |
|
"learning_rate": 0.0, |
|
"loss": 0.3218, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 88.88888888888889, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.896327793598175, |
|
"eval_runtime": 0.4773, |
|
"eval_samples_per_second": 446.302, |
|
"eval_steps_per_second": 14.667, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 88.88888888888889, |
|
"step": 600, |
|
"total_flos": 1.8862166953495757e+18, |
|
"train_loss": 1.1065828450520834, |
|
"train_runtime": 510.0334, |
|
"train_samples_per_second": 167.048, |
|
"train_steps_per_second": 1.176 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8862166953495757e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|