|
{ |
|
"best_metric": 0.9375, |
|
"best_model_checkpoint": "videomae-base-finetuned-ucf101-subset_fhbh/checkpoint-638", |
|
"epoch": 24.0020350877193, |
|
"eval_steps": 500, |
|
"global_step": 1450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003508771929824561, |
|
"grad_norm": 5.952354907989502, |
|
"learning_rate": 1.7543859649122808e-07, |
|
"loss": 0.745, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0007017543859649122, |
|
"grad_norm": 4.616081714630127, |
|
"learning_rate": 3.5087719298245616e-07, |
|
"loss": 0.6751, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0010526315789473684, |
|
"grad_norm": 14.299074172973633, |
|
"learning_rate": 5.263157894736843e-07, |
|
"loss": 0.7311, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0014035087719298245, |
|
"grad_norm": 9.126326560974121, |
|
"learning_rate": 7.017543859649123e-07, |
|
"loss": 0.6957, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0017543859649122807, |
|
"grad_norm": 6.692790985107422, |
|
"learning_rate": 8.771929824561404e-07, |
|
"loss": 0.7533, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0020350877192982456, |
|
"eval_accuracy": 0.5416666666666666, |
|
"eval_loss": 0.677791178226471, |
|
"eval_runtime": 78.366, |
|
"eval_samples_per_second": 0.613, |
|
"eval_steps_per_second": 0.153, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.0000701754385966, |
|
"grad_norm": 6.502946853637695, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 0.7694, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0004210526315789, |
|
"grad_norm": 11.516799926757812, |
|
"learning_rate": 1.2280701754385965e-06, |
|
"loss": 0.7382, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0007719298245614, |
|
"grad_norm": 7.619742393493652, |
|
"learning_rate": 1.4035087719298246e-06, |
|
"loss": 0.6912, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.001122807017544, |
|
"grad_norm": 5.542720794677734, |
|
"learning_rate": 1.5789473684210528e-06, |
|
"loss": 0.7054, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0014736842105263, |
|
"grad_norm": 7.172524929046631, |
|
"learning_rate": 1.7543859649122807e-06, |
|
"loss": 0.7533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0018245614035088, |
|
"grad_norm": 6.668615341186523, |
|
"learning_rate": 1.929824561403509e-06, |
|
"loss": 0.7229, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0020350877192983, |
|
"eval_accuracy": 0.5416666666666666, |
|
"eval_loss": 0.663836658000946, |
|
"eval_runtime": 77.9477, |
|
"eval_samples_per_second": 0.616, |
|
"eval_steps_per_second": 0.154, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.000140350877193, |
|
"grad_norm": 4.909543991088867, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 0.6922, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0004912280701754, |
|
"grad_norm": 9.0471830368042, |
|
"learning_rate": 2.2807017543859652e-06, |
|
"loss": 0.6736, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0008421052631578, |
|
"grad_norm": 6.69089412689209, |
|
"learning_rate": 2.456140350877193e-06, |
|
"loss": 0.6865, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0011929824561405, |
|
"grad_norm": 9.476597785949707, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.6844, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.001543859649123, |
|
"grad_norm": 7.067219257354736, |
|
"learning_rate": 2.8070175438596493e-06, |
|
"loss": 0.6768, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.001894736842105, |
|
"grad_norm": 5.748457908630371, |
|
"learning_rate": 2.9824561403508774e-06, |
|
"loss": 0.6827, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0020350877192983, |
|
"eval_accuracy": 0.6041666666666666, |
|
"eval_loss": 0.6515334248542786, |
|
"eval_runtime": 77.9754, |
|
"eval_samples_per_second": 0.616, |
|
"eval_steps_per_second": 0.154, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 3.0002105263157897, |
|
"grad_norm": 8.415090560913086, |
|
"learning_rate": 3.1578947368421056e-06, |
|
"loss": 0.7035, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.000561403508772, |
|
"grad_norm": 7.755239963531494, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.712, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0009122807017543, |
|
"grad_norm": 11.437898635864258, |
|
"learning_rate": 3.5087719298245615e-06, |
|
"loss": 0.6409, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0012631578947366, |
|
"grad_norm": 6.896209239959717, |
|
"learning_rate": 3.6842105263157892e-06, |
|
"loss": 0.6862, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0016140350877194, |
|
"grad_norm": 5.764392852783203, |
|
"learning_rate": 3.859649122807018e-06, |
|
"loss": 0.6459, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0019649122807017, |
|
"grad_norm": 8.806387901306152, |
|
"learning_rate": 4.035087719298246e-06, |
|
"loss": 0.7322, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0020350877192983, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6666872501373291, |
|
"eval_runtime": 78.325, |
|
"eval_samples_per_second": 0.613, |
|
"eval_steps_per_second": 0.153, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 4.000280701754386, |
|
"grad_norm": 11.507173538208008, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.6937, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.0006315789473685, |
|
"grad_norm": 7.351099491119385, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.6292, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.000982456140351, |
|
"grad_norm": 4.936241149902344, |
|
"learning_rate": 4.5614035087719304e-06, |
|
"loss": 0.6041, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.001333333333333, |
|
"grad_norm": 10.265213012695312, |
|
"learning_rate": 4.736842105263159e-06, |
|
"loss": 0.6616, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.0016842105263155, |
|
"grad_norm": 14.022355079650879, |
|
"learning_rate": 4.912280701754386e-06, |
|
"loss": 0.6489, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.002035087719298, |
|
"grad_norm": 14.538658142089844, |
|
"learning_rate": 5.087719298245614e-06, |
|
"loss": 0.6552, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.002035087719298, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6378026604652405, |
|
"eval_runtime": 78.4115, |
|
"eval_samples_per_second": 0.612, |
|
"eval_steps_per_second": 0.153, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.000350877192982, |
|
"grad_norm": 6.908311367034912, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.6183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.000701754385965, |
|
"grad_norm": 6.211957931518555, |
|
"learning_rate": 5.43859649122807e-06, |
|
"loss": 0.5759, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.001052631578947, |
|
"grad_norm": 4.951029300689697, |
|
"learning_rate": 5.6140350877192985e-06, |
|
"loss": 0.6144, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.00140350877193, |
|
"grad_norm": 8.593265533447266, |
|
"learning_rate": 5.789473684210527e-06, |
|
"loss": 0.5619, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.0017543859649125, |
|
"grad_norm": 19.80694007873535, |
|
"learning_rate": 5.964912280701755e-06, |
|
"loss": 0.4691, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.002035087719298, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5537357926368713, |
|
"eval_runtime": 80.2663, |
|
"eval_samples_per_second": 0.598, |
|
"eval_steps_per_second": 0.15, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 6.000070175438596, |
|
"grad_norm": 19.27092170715332, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.5575, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.000421052631579, |
|
"grad_norm": 14.520448684692383, |
|
"learning_rate": 6.315789473684211e-06, |
|
"loss": 0.5209, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.000771929824562, |
|
"grad_norm": 13.577587127685547, |
|
"learning_rate": 6.4912280701754385e-06, |
|
"loss": 0.4873, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.001122807017544, |
|
"grad_norm": 2.4672834873199463, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3996, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.001473684210526, |
|
"grad_norm": 29.06943702697754, |
|
"learning_rate": 6.842105263157896e-06, |
|
"loss": 0.58, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.001824561403509, |
|
"grad_norm": 10.214743614196777, |
|
"learning_rate": 7.017543859649123e-06, |
|
"loss": 0.6845, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.002035087719298, |
|
"eval_accuracy": 0.7083333333333334, |
|
"eval_loss": 0.6998243927955627, |
|
"eval_runtime": 81.5316, |
|
"eval_samples_per_second": 0.589, |
|
"eval_steps_per_second": 0.147, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 7.000140350877193, |
|
"grad_norm": 72.12657928466797, |
|
"learning_rate": 7.192982456140351e-06, |
|
"loss": 0.6733, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.000491228070175, |
|
"grad_norm": 5.446975231170654, |
|
"learning_rate": 7.3684210526315784e-06, |
|
"loss": 0.2873, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.000842105263158, |
|
"grad_norm": 9.24228286743164, |
|
"learning_rate": 7.5438596491228074e-06, |
|
"loss": 0.4578, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.00119298245614, |
|
"grad_norm": 1.2333711385726929, |
|
"learning_rate": 7.719298245614036e-06, |
|
"loss": 0.3516, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.001543859649122, |
|
"grad_norm": 6.666906833648682, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 0.5434, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.001894736842106, |
|
"grad_norm": 18.284526824951172, |
|
"learning_rate": 8.070175438596492e-06, |
|
"loss": 0.6754, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.002035087719298, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.36466991901397705, |
|
"eval_runtime": 80.8792, |
|
"eval_samples_per_second": 0.593, |
|
"eval_steps_per_second": 0.148, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 8.00021052631579, |
|
"grad_norm": 8.833359718322754, |
|
"learning_rate": 8.245614035087721e-06, |
|
"loss": 0.4877, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.000561403508772, |
|
"grad_norm": 10.950183868408203, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 0.3044, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.000912280701755, |
|
"grad_norm": 2.037674903869629, |
|
"learning_rate": 8.596491228070176e-06, |
|
"loss": 0.2232, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.001263157894737, |
|
"grad_norm": 78.8741455078125, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.1771, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.00161403508772, |
|
"grad_norm": 90.6770248413086, |
|
"learning_rate": 8.947368421052632e-06, |
|
"loss": 1.1209, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.001964912280702, |
|
"grad_norm": 39.13031768798828, |
|
"learning_rate": 9.122807017543861e-06, |
|
"loss": 0.8425, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.002035087719298, |
|
"eval_accuracy": 0.5416666666666666, |
|
"eval_loss": 0.6199241876602173, |
|
"eval_runtime": 81.9922, |
|
"eval_samples_per_second": 0.585, |
|
"eval_steps_per_second": 0.146, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 9.000280701754386, |
|
"grad_norm": 17.226152420043945, |
|
"learning_rate": 9.298245614035088e-06, |
|
"loss": 0.7695, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.000631578947369, |
|
"grad_norm": 12.632246971130371, |
|
"learning_rate": 9.473684210526317e-06, |
|
"loss": 0.5423, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.00098245614035, |
|
"grad_norm": 7.4788336753845215, |
|
"learning_rate": 9.649122807017545e-06, |
|
"loss": 0.6734, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.001333333333333, |
|
"grad_norm": 32.823486328125, |
|
"learning_rate": 9.824561403508772e-06, |
|
"loss": 0.4033, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.001684210526316, |
|
"grad_norm": 5.6088480949401855, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2009, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.002035087719298, |
|
"grad_norm": 0.8267044425010681, |
|
"learning_rate": 1.0175438596491228e-05, |
|
"loss": 0.2276, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.002035087719298, |
|
"eval_accuracy": 0.7291666666666666, |
|
"eval_loss": 0.9983854293823242, |
|
"eval_runtime": 81.8828, |
|
"eval_samples_per_second": 0.586, |
|
"eval_steps_per_second": 0.147, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.000350877192982, |
|
"grad_norm": 0.6918083429336548, |
|
"learning_rate": 1.0350877192982457e-05, |
|
"loss": 0.4027, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.000701754385965, |
|
"grad_norm": 12.070817947387695, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.1868, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.001052631578947, |
|
"grad_norm": 11.899476051330566, |
|
"learning_rate": 1.0701754385964913e-05, |
|
"loss": 0.8328, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.00140350877193, |
|
"grad_norm": 18.76070213317871, |
|
"learning_rate": 1.087719298245614e-05, |
|
"loss": 0.4753, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.001754385964912, |
|
"grad_norm": 15.813506126403809, |
|
"learning_rate": 1.1052631578947368e-05, |
|
"loss": 0.3953, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.002035087719298, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.3595670759677887, |
|
"eval_runtime": 84.5422, |
|
"eval_samples_per_second": 0.568, |
|
"eval_steps_per_second": 0.142, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 11.000070175438596, |
|
"grad_norm": 2.381981372833252, |
|
"learning_rate": 1.1228070175438597e-05, |
|
"loss": 0.3252, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.000421052631578, |
|
"grad_norm": 8.495650291442871, |
|
"learning_rate": 1.1403508771929824e-05, |
|
"loss": 0.2205, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.00077192982456, |
|
"grad_norm": 0.5458263754844666, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 0.4623, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.001122807017543, |
|
"grad_norm": 35.78744888305664, |
|
"learning_rate": 1.1754385964912282e-05, |
|
"loss": 0.4652, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.001473684210527, |
|
"grad_norm": 69.58731842041016, |
|
"learning_rate": 1.192982456140351e-05, |
|
"loss": 0.2175, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 11.00182456140351, |
|
"grad_norm": 80.09464263916016, |
|
"learning_rate": 1.2105263157894737e-05, |
|
"loss": 0.3255, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 11.002035087719298, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.39784160256385803, |
|
"eval_runtime": 82.0895, |
|
"eval_samples_per_second": 0.585, |
|
"eval_steps_per_second": 0.146, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 12.000140350877192, |
|
"grad_norm": 0.08766458928585052, |
|
"learning_rate": 1.2280701754385964e-05, |
|
"loss": 0.0288, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.000491228070176, |
|
"grad_norm": 10.239900588989258, |
|
"learning_rate": 1.2456140350877193e-05, |
|
"loss": 0.2648, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.000842105263159, |
|
"grad_norm": 5.331236839294434, |
|
"learning_rate": 1.2631578947368422e-05, |
|
"loss": 0.3223, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.001192982456141, |
|
"grad_norm": 0.24060657620429993, |
|
"learning_rate": 1.2807017543859651e-05, |
|
"loss": 0.2808, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 12.001543859649123, |
|
"grad_norm": 0.31760913133621216, |
|
"learning_rate": 1.2982456140350877e-05, |
|
"loss": 0.2207, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 12.001894736842106, |
|
"grad_norm": 70.13704681396484, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.2524, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.002035087719298, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.3351368010044098, |
|
"eval_runtime": 80.8837, |
|
"eval_samples_per_second": 0.593, |
|
"eval_steps_per_second": 0.148, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 13.00021052631579, |
|
"grad_norm": 0.32135623693466187, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.2225, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.000561403508772, |
|
"grad_norm": 21.094276428222656, |
|
"learning_rate": 1.3508771929824562e-05, |
|
"loss": 0.5212, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.000912280701755, |
|
"grad_norm": 0.08428701013326645, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 0.4246, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.001263157894737, |
|
"grad_norm": 0.18355534970760345, |
|
"learning_rate": 1.3859649122807017e-05, |
|
"loss": 0.0793, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 13.00161403508772, |
|
"grad_norm": 8.33340072631836, |
|
"learning_rate": 1.4035087719298246e-05, |
|
"loss": 0.3384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.001964912280702, |
|
"grad_norm": 0.7141004204750061, |
|
"learning_rate": 1.4210526315789475e-05, |
|
"loss": 0.5978, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 13.002035087719298, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.23082482814788818, |
|
"eval_runtime": 81.747, |
|
"eval_samples_per_second": 0.587, |
|
"eval_steps_per_second": 0.147, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 14.000280701754386, |
|
"grad_norm": 0.15585492551326752, |
|
"learning_rate": 1.4385964912280702e-05, |
|
"loss": 0.122, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.000631578947369, |
|
"grad_norm": 49.04802322387695, |
|
"learning_rate": 1.4561403508771931e-05, |
|
"loss": 0.522, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.00098245614035, |
|
"grad_norm": 0.3657858967781067, |
|
"learning_rate": 1.4736842105263157e-05, |
|
"loss": 0.0476, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.001333333333333, |
|
"grad_norm": 0.05123307183384895, |
|
"learning_rate": 1.4912280701754386e-05, |
|
"loss": 0.2268, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.001684210526316, |
|
"grad_norm": 0.08785073459148407, |
|
"learning_rate": 1.5087719298245615e-05, |
|
"loss": 0.4392, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 14.002035087719298, |
|
"grad_norm": 0.33805736899375916, |
|
"learning_rate": 1.5263157894736842e-05, |
|
"loss": 0.1542, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 14.002035087719298, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.5762323141098022, |
|
"eval_runtime": 82.832, |
|
"eval_samples_per_second": 0.579, |
|
"eval_steps_per_second": 0.145, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.000350877192982, |
|
"grad_norm": 0.06892251968383789, |
|
"learning_rate": 1.543859649122807e-05, |
|
"loss": 0.1377, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.000701754385965, |
|
"grad_norm": 0.07005161046981812, |
|
"learning_rate": 1.56140350877193e-05, |
|
"loss": 0.0053, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.001052631578947, |
|
"grad_norm": 0.03198734670877457, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.5775, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.00140350877193, |
|
"grad_norm": 171.48255920410156, |
|
"learning_rate": 1.5964912280701755e-05, |
|
"loss": 0.3737, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 15.001754385964912, |
|
"grad_norm": 0.4068077504634857, |
|
"learning_rate": 1.6140350877192984e-05, |
|
"loss": 0.3073, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 15.002035087719298, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.33416375517845154, |
|
"eval_runtime": 83.3591, |
|
"eval_samples_per_second": 0.576, |
|
"eval_steps_per_second": 0.144, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 16.000070175438598, |
|
"grad_norm": 0.3335668444633484, |
|
"learning_rate": 1.6315789473684213e-05, |
|
"loss": 0.7197, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.00042105263158, |
|
"grad_norm": 1.4757983684539795, |
|
"learning_rate": 1.6491228070175442e-05, |
|
"loss": 0.2539, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.000771929824563, |
|
"grad_norm": 0.17356331646442413, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0063, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.001122807017545, |
|
"grad_norm": 0.1452503204345703, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 0.5967, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 16.001473684210527, |
|
"grad_norm": 0.1030503362417221, |
|
"learning_rate": 1.7017543859649125e-05, |
|
"loss": 0.6578, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 16.00182456140351, |
|
"grad_norm": 12.400784492492676, |
|
"learning_rate": 1.719298245614035e-05, |
|
"loss": 0.5518, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 16.0020350877193, |
|
"eval_accuracy": 0.8541666666666666, |
|
"eval_loss": 0.4223368465900421, |
|
"eval_runtime": 83.4362, |
|
"eval_samples_per_second": 0.575, |
|
"eval_steps_per_second": 0.144, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 17.000140350877192, |
|
"grad_norm": 0.28909754753112793, |
|
"learning_rate": 1.736842105263158e-05, |
|
"loss": 0.2008, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.000491228070175, |
|
"grad_norm": 0.21579360961914062, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.3298, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.000842105263157, |
|
"grad_norm": 0.10615105926990509, |
|
"learning_rate": 1.7719298245614035e-05, |
|
"loss": 0.004, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 17.00119298245614, |
|
"grad_norm": 0.046201951801776886, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 0.3526, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 17.00154385964912, |
|
"grad_norm": 0.06010481342673302, |
|
"learning_rate": 1.8070175438596493e-05, |
|
"loss": 0.3399, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 17.001894736842104, |
|
"grad_norm": 8.584966659545898, |
|
"learning_rate": 1.8245614035087722e-05, |
|
"loss": 0.6157, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 17.0020350877193, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.17038817703723907, |
|
"eval_runtime": 83.7401, |
|
"eval_samples_per_second": 0.573, |
|
"eval_steps_per_second": 0.143, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 18.00021052631579, |
|
"grad_norm": 0.774956464767456, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 0.1596, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.000561403508772, |
|
"grad_norm": 0.36749064922332764, |
|
"learning_rate": 1.8596491228070176e-05, |
|
"loss": 0.2122, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.000912280701755, |
|
"grad_norm": 0.06645552814006805, |
|
"learning_rate": 1.8771929824561405e-05, |
|
"loss": 0.2568, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 18.001263157894737, |
|
"grad_norm": 0.021599041298031807, |
|
"learning_rate": 1.8947368421052634e-05, |
|
"loss": 0.283, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 18.00161403508772, |
|
"grad_norm": 113.25637817382812, |
|
"learning_rate": 1.9122807017543863e-05, |
|
"loss": 0.3591, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 18.0019649122807, |
|
"grad_norm": 0.21973834931850433, |
|
"learning_rate": 1.929824561403509e-05, |
|
"loss": 0.2544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 18.0020350877193, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.35440635681152344, |
|
"eval_runtime": 82.2034, |
|
"eval_samples_per_second": 0.584, |
|
"eval_steps_per_second": 0.146, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 19.000280701754384, |
|
"grad_norm": 0.06097158417105675, |
|
"learning_rate": 1.9473684210526315e-05, |
|
"loss": 0.3663, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.000631578947367, |
|
"grad_norm": 25.72997283935547, |
|
"learning_rate": 1.9649122807017544e-05, |
|
"loss": 0.8104, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.000982456140353, |
|
"grad_norm": 0.5115303993225098, |
|
"learning_rate": 1.9824561403508773e-05, |
|
"loss": 0.2474, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 19.001333333333335, |
|
"grad_norm": 0.27492067217826843, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3686, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 19.001684210526317, |
|
"grad_norm": 22.944690704345703, |
|
"learning_rate": 2.0175438596491227e-05, |
|
"loss": 0.2315, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 19.0020350877193, |
|
"grad_norm": 0.11991500854492188, |
|
"learning_rate": 2.0350877192982456e-05, |
|
"loss": 0.4036, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 19.0020350877193, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.25051262974739075, |
|
"eval_runtime": 80.7899, |
|
"eval_samples_per_second": 0.594, |
|
"eval_steps_per_second": 0.149, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.000350877192982, |
|
"grad_norm": 0.44547587633132935, |
|
"learning_rate": 2.0526315789473685e-05, |
|
"loss": 0.1078, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.000701754385965, |
|
"grad_norm": 76.07775115966797, |
|
"learning_rate": 2.0701754385964914e-05, |
|
"loss": 0.4915, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 20.001052631578947, |
|
"grad_norm": 0.349282830953598, |
|
"learning_rate": 2.0877192982456143e-05, |
|
"loss": 0.2929, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 20.00140350877193, |
|
"grad_norm": 8.304322242736816, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 0.219, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.00175438596491, |
|
"grad_norm": 0.08941491693258286, |
|
"learning_rate": 2.1228070175438598e-05, |
|
"loss": 0.2382, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 20.0020350877193, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.3155660927295685, |
|
"eval_runtime": 82.6296, |
|
"eval_samples_per_second": 0.581, |
|
"eval_steps_per_second": 0.145, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 21.000070175438598, |
|
"grad_norm": 6.294134140014648, |
|
"learning_rate": 2.1403508771929827e-05, |
|
"loss": 0.2611, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.00042105263158, |
|
"grad_norm": 0.11261521279811859, |
|
"learning_rate": 2.1578947368421053e-05, |
|
"loss": 0.1969, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.000771929824563, |
|
"grad_norm": 0.2796896696090698, |
|
"learning_rate": 2.175438596491228e-05, |
|
"loss": 0.2955, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.001122807017545, |
|
"grad_norm": 0.07930008322000504, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.013, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 21.001473684210527, |
|
"grad_norm": 5.909428119659424, |
|
"learning_rate": 2.2105263157894736e-05, |
|
"loss": 0.3568, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 21.00182456140351, |
|
"grad_norm": 168.33380126953125, |
|
"learning_rate": 2.2280701754385965e-05, |
|
"loss": 0.6751, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 21.0020350877193, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.259630411863327, |
|
"eval_runtime": 82.1271, |
|
"eval_samples_per_second": 0.584, |
|
"eval_steps_per_second": 0.146, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 22.000140350877192, |
|
"grad_norm": 0.22503353655338287, |
|
"learning_rate": 2.2456140350877194e-05, |
|
"loss": 0.3249, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.000491228070175, |
|
"grad_norm": 0.2562604248523712, |
|
"learning_rate": 2.2631578947368423e-05, |
|
"loss": 0.2267, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 22.000842105263157, |
|
"grad_norm": 0.6118970513343811, |
|
"learning_rate": 2.280701754385965e-05, |
|
"loss": 0.7495, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 22.00119298245614, |
|
"grad_norm": 0.2397994101047516, |
|
"learning_rate": 2.2982456140350878e-05, |
|
"loss": 0.0388, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 22.00154385964912, |
|
"grad_norm": 0.10384727269411087, |
|
"learning_rate": 2.3157894736842107e-05, |
|
"loss": 0.3285, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 22.001894736842104, |
|
"grad_norm": 0.0419117733836174, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.2848, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 22.0020350877193, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.822706937789917, |
|
"eval_runtime": 83.5818, |
|
"eval_samples_per_second": 0.574, |
|
"eval_steps_per_second": 0.144, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 23.00021052631579, |
|
"grad_norm": 121.5499038696289, |
|
"learning_rate": 2.3508771929824565e-05, |
|
"loss": 0.5364, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 23.000561403508772, |
|
"grad_norm": 0.10266309231519699, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 0.8097, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 23.000912280701755, |
|
"grad_norm": 9.736127853393555, |
|
"learning_rate": 2.385964912280702e-05, |
|
"loss": 0.6052, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 23.001263157894737, |
|
"grad_norm": 4.3637471199035645, |
|
"learning_rate": 2.4035087719298245e-05, |
|
"loss": 0.3504, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 23.00161403508772, |
|
"grad_norm": 0.19882246851921082, |
|
"learning_rate": 2.4210526315789474e-05, |
|
"loss": 0.3784, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 23.0019649122807, |
|
"grad_norm": 0.27082210779190063, |
|
"learning_rate": 2.4385964912280703e-05, |
|
"loss": 0.1225, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 23.0020350877193, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.2921377420425415, |
|
"eval_runtime": 82.9556, |
|
"eval_samples_per_second": 0.579, |
|
"eval_steps_per_second": 0.145, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 24.000280701754384, |
|
"grad_norm": 0.2171986997127533, |
|
"learning_rate": 2.456140350877193e-05, |
|
"loss": 0.1094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.000631578947367, |
|
"grad_norm": 0.21692253649234772, |
|
"learning_rate": 2.4736842105263158e-05, |
|
"loss": 0.3332, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 24.000982456140353, |
|
"grad_norm": 0.3834693729877472, |
|
"learning_rate": 2.4912280701754387e-05, |
|
"loss": 0.2847, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 24.001333333333335, |
|
"grad_norm": 0.08816500753164291, |
|
"learning_rate": 2.5087719298245616e-05, |
|
"loss": 0.1147, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 24.001684210526317, |
|
"grad_norm": 0.21103212237358093, |
|
"learning_rate": 2.5263157894736845e-05, |
|
"loss": 0.4283, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 24.0020350877193, |
|
"grad_norm": 0.27631059288978577, |
|
"learning_rate": 2.5438596491228074e-05, |
|
"loss": 0.616, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 24.0020350877193, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.2928893566131592, |
|
"eval_runtime": 81.6066, |
|
"eval_samples_per_second": 0.588, |
|
"eval_steps_per_second": 0.147, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 24.0020350877193, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.2618250548839569, |
|
"eval_runtime": 88.9045, |
|
"eval_samples_per_second": 0.574, |
|
"eval_steps_per_second": 0.146, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 24.0020350877193, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.2618250548839569, |
|
"eval_runtime": 89.4759, |
|
"eval_samples_per_second": 0.57, |
|
"eval_steps_per_second": 0.145, |
|
"step": 1450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 28500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.164871389462528e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|