|
{ |
|
"best_metric": 0.9906542056074766, |
|
"best_model_checkpoint": "finetuned-for-YogaPosesv4/checkpoint-800", |
|
"epoch": 12.0, |
|
"eval_steps": 100, |
|
"global_step": 1368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08771929824561403, |
|
"grad_norm": 7.6294169425964355, |
|
"learning_rate": 0.00019853801169590645, |
|
"loss": 0.1835, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 2.819319009780884, |
|
"learning_rate": 0.00019707602339181287, |
|
"loss": 0.1356, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 7.1533966064453125, |
|
"learning_rate": 0.0001956140350877193, |
|
"loss": 0.0432, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 6.63047456741333, |
|
"learning_rate": 0.00019415204678362573, |
|
"loss": 0.1341, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.43859649122807015, |
|
"grad_norm": 7.353053092956543, |
|
"learning_rate": 0.00019269005847953217, |
|
"loss": 0.0872, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.5175597071647644, |
|
"learning_rate": 0.0001912280701754386, |
|
"loss": 0.0915, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6140350877192983, |
|
"grad_norm": 2.7669591903686523, |
|
"learning_rate": 0.00018976608187134503, |
|
"loss": 0.0663, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 15.684842109680176, |
|
"learning_rate": 0.00018830409356725147, |
|
"loss": 0.0644, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7894736842105263, |
|
"grad_norm": 12.486333847045898, |
|
"learning_rate": 0.00018684210526315792, |
|
"loss": 0.1148, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 2.681043863296509, |
|
"learning_rate": 0.00018538011695906433, |
|
"loss": 0.0434, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"eval_accuracy": 0.9595015576323987, |
|
"eval_loss": 0.1421448141336441, |
|
"eval_runtime": 35.5517, |
|
"eval_samples_per_second": 9.029, |
|
"eval_steps_per_second": 1.153, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9649122807017544, |
|
"grad_norm": 0.4669760465621948, |
|
"learning_rate": 0.00018391812865497077, |
|
"loss": 0.1035, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 1.2481253147125244, |
|
"learning_rate": 0.0001824561403508772, |
|
"loss": 0.0242, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1403508771929824, |
|
"grad_norm": 0.747638463973999, |
|
"learning_rate": 0.00018099415204678363, |
|
"loss": 0.0911, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 4.2482428550720215, |
|
"learning_rate": 0.00017953216374269005, |
|
"loss": 0.0243, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3157894736842106, |
|
"grad_norm": 14.569940567016602, |
|
"learning_rate": 0.0001780701754385965, |
|
"loss": 0.0906, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 6.877868175506592, |
|
"learning_rate": 0.00017660818713450294, |
|
"loss": 0.0318, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4912280701754386, |
|
"grad_norm": 0.7561317086219788, |
|
"learning_rate": 0.00017514619883040938, |
|
"loss": 0.0707, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 3.7642083168029785, |
|
"learning_rate": 0.0001736842105263158, |
|
"loss": 0.0514, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.7942452430725098, |
|
"learning_rate": 0.00017222222222222224, |
|
"loss": 0.0531, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.05782110244035721, |
|
"learning_rate": 0.00017076023391812865, |
|
"loss": 0.033, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"eval_accuracy": 0.9875389408099688, |
|
"eval_loss": 0.0879807248711586, |
|
"eval_runtime": 40.2074, |
|
"eval_samples_per_second": 7.984, |
|
"eval_steps_per_second": 1.02, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8421052631578947, |
|
"grad_norm": 13.322481155395508, |
|
"learning_rate": 0.0001692982456140351, |
|
"loss": 0.1105, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 0.9878503084182739, |
|
"learning_rate": 0.0001678362573099415, |
|
"loss": 0.0581, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.017543859649123, |
|
"grad_norm": 3.3777120113372803, |
|
"learning_rate": 0.00016637426900584796, |
|
"loss": 0.0391, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.03219028189778328, |
|
"learning_rate": 0.0001649122807017544, |
|
"loss": 0.0413, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.192982456140351, |
|
"grad_norm": 11.16067123413086, |
|
"learning_rate": 0.00016345029239766084, |
|
"loss": 0.0693, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.280701754385965, |
|
"grad_norm": 17.428421020507812, |
|
"learning_rate": 0.00016198830409356726, |
|
"loss": 0.1285, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.3684210526315788, |
|
"grad_norm": 3.1800684928894043, |
|
"learning_rate": 0.0001605263157894737, |
|
"loss": 0.0185, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 0.18285800516605377, |
|
"learning_rate": 0.00015906432748538012, |
|
"loss": 0.0343, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.543859649122807, |
|
"grad_norm": 4.973541259765625, |
|
"learning_rate": 0.00015760233918128656, |
|
"loss": 0.053, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.5851675271987915, |
|
"learning_rate": 0.00015614035087719297, |
|
"loss": 0.084, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"eval_accuracy": 0.9844236760124611, |
|
"eval_loss": 0.09186869114637375, |
|
"eval_runtime": 39.6899, |
|
"eval_samples_per_second": 8.088, |
|
"eval_steps_per_second": 1.033, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.719298245614035, |
|
"grad_norm": 0.42921513319015503, |
|
"learning_rate": 0.00015467836257309942, |
|
"loss": 0.0589, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 14.734365463256836, |
|
"learning_rate": 0.00015321637426900586, |
|
"loss": 0.0975, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.8947368421052633, |
|
"grad_norm": 3.1166839599609375, |
|
"learning_rate": 0.0001517543859649123, |
|
"loss": 0.0401, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"grad_norm": 3.258279800415039, |
|
"learning_rate": 0.00015029239766081872, |
|
"loss": 0.0549, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.0701754385964914, |
|
"grad_norm": 0.8289499878883362, |
|
"learning_rate": 0.00014883040935672514, |
|
"loss": 0.0592, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.16162537038326263, |
|
"learning_rate": 0.00014736842105263158, |
|
"loss": 0.1327, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.245614035087719, |
|
"grad_norm": 3.4843053817749023, |
|
"learning_rate": 0.00014590643274853802, |
|
"loss": 0.0478, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.20168577134609222, |
|
"learning_rate": 0.00014444444444444444, |
|
"loss": 0.0901, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.4210526315789473, |
|
"grad_norm": 3.0230636596679688, |
|
"learning_rate": 0.00014298245614035088, |
|
"loss": 0.0749, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 4.313819408416748, |
|
"learning_rate": 0.00014152046783625732, |
|
"loss": 0.0655, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"eval_accuracy": 0.9875389408099688, |
|
"eval_loss": 0.09323806315660477, |
|
"eval_runtime": 37.6349, |
|
"eval_samples_per_second": 8.529, |
|
"eval_steps_per_second": 1.089, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.5964912280701755, |
|
"grad_norm": 0.0746302455663681, |
|
"learning_rate": 0.00014005847953216377, |
|
"loss": 0.0508, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 12.243117332458496, |
|
"learning_rate": 0.00013859649122807018, |
|
"loss": 0.0327, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.7719298245614032, |
|
"grad_norm": 0.5793654918670654, |
|
"learning_rate": 0.0001371345029239766, |
|
"loss": 0.05, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 0.30418357253074646, |
|
"learning_rate": 0.00013567251461988304, |
|
"loss": 0.0135, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.9473684210526314, |
|
"grad_norm": 0.7474086284637451, |
|
"learning_rate": 0.00013421052631578948, |
|
"loss": 0.0464, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.035087719298246, |
|
"grad_norm": 3.3401076793670654, |
|
"learning_rate": 0.0001327485380116959, |
|
"loss": 0.181, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.12280701754386, |
|
"grad_norm": 3.931779623031616, |
|
"learning_rate": 0.00013128654970760234, |
|
"loss": 0.0092, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 0.41829147934913635, |
|
"learning_rate": 0.0001298245614035088, |
|
"loss": 0.0177, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.298245614035087, |
|
"grad_norm": 0.22863353788852692, |
|
"learning_rate": 0.00012836257309941523, |
|
"loss": 0.0183, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 0.01852666214108467, |
|
"learning_rate": 0.00012690058479532165, |
|
"loss": 0.0126, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"eval_accuracy": 0.9875389408099688, |
|
"eval_loss": 0.06964406371116638, |
|
"eval_runtime": 36.8852, |
|
"eval_samples_per_second": 8.703, |
|
"eval_steps_per_second": 1.112, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.473684210526316, |
|
"grad_norm": 1.5573415756225586, |
|
"learning_rate": 0.00012543859649122806, |
|
"loss": 0.0668, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 0.009869000874459743, |
|
"learning_rate": 0.0001239766081871345, |
|
"loss": 0.0137, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.649122807017544, |
|
"grad_norm": 0.20358864963054657, |
|
"learning_rate": 0.00012251461988304095, |
|
"loss": 0.0164, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.7368421052631575, |
|
"grad_norm": 0.5513414144515991, |
|
"learning_rate": 0.00012105263157894738, |
|
"loss": 0.028, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.824561403508772, |
|
"grad_norm": 3.767604351043701, |
|
"learning_rate": 0.0001195906432748538, |
|
"loss": 0.016, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 0.5676788091659546, |
|
"learning_rate": 0.00011812865497076025, |
|
"loss": 0.037, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 94.76567077636719, |
|
"learning_rate": 0.00011666666666666668, |
|
"loss": 0.2472, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.087719298245614, |
|
"grad_norm": 0.013286423869431019, |
|
"learning_rate": 0.00011520467836257311, |
|
"loss": 0.0449, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.175438596491228, |
|
"grad_norm": 2.3555777072906494, |
|
"learning_rate": 0.00011374269005847952, |
|
"loss": 0.1002, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 21.486955642700195, |
|
"learning_rate": 0.00011228070175438597, |
|
"loss": 0.0487, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"eval_accuracy": 0.9719626168224299, |
|
"eval_loss": 0.08467654883861542, |
|
"eval_runtime": 35.5793, |
|
"eval_samples_per_second": 9.022, |
|
"eval_steps_per_second": 1.152, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.350877192982456, |
|
"grad_norm": 3.456000328063965, |
|
"learning_rate": 0.0001108187134502924, |
|
"loss": 0.0451, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.43859649122807, |
|
"grad_norm": 0.21063438057899475, |
|
"learning_rate": 0.00010935672514619884, |
|
"loss": 0.0016, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.526315789473684, |
|
"grad_norm": 0.007295363582670689, |
|
"learning_rate": 0.00010789473684210527, |
|
"loss": 0.0298, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 1.68939208984375, |
|
"learning_rate": 0.00010643274853801171, |
|
"loss": 0.0175, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.701754385964913, |
|
"grad_norm": 0.7690061330795288, |
|
"learning_rate": 0.00010497076023391814, |
|
"loss": 0.0565, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.7894736842105265, |
|
"grad_norm": 0.004914429504424334, |
|
"learning_rate": 0.00010350877192982457, |
|
"loss": 0.0151, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.87719298245614, |
|
"grad_norm": 2.762733221054077, |
|
"learning_rate": 0.00010204678362573099, |
|
"loss": 0.0428, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 1.9332022666931152, |
|
"learning_rate": 0.00010058479532163743, |
|
"loss": 0.0335, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.052631578947368, |
|
"grad_norm": 0.0930035263299942, |
|
"learning_rate": 9.912280701754386e-05, |
|
"loss": 0.0342, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"grad_norm": 0.03953389823436737, |
|
"learning_rate": 9.76608187134503e-05, |
|
"loss": 0.0114, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"eval_accuracy": 0.9813084112149533, |
|
"eval_loss": 0.11030136793851852, |
|
"eval_runtime": 35.4585, |
|
"eval_samples_per_second": 9.053, |
|
"eval_steps_per_second": 1.156, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.228070175438597, |
|
"grad_norm": 0.06557751446962357, |
|
"learning_rate": 9.619883040935673e-05, |
|
"loss": 0.0089, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 11.056671142578125, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 0.0294, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.4035087719298245, |
|
"grad_norm": 0.26337969303131104, |
|
"learning_rate": 9.327485380116959e-05, |
|
"loss": 0.0213, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.491228070175438, |
|
"grad_norm": 0.15492522716522217, |
|
"learning_rate": 9.181286549707603e-05, |
|
"loss": 0.0469, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.578947368421053, |
|
"grad_norm": 0.013370133936405182, |
|
"learning_rate": 9.035087719298246e-05, |
|
"loss": 0.0004, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.10022695362567902, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.0255, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.754385964912281, |
|
"grad_norm": 0.0027779792435467243, |
|
"learning_rate": 8.742690058479532e-05, |
|
"loss": 0.0358, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.842105263157895, |
|
"grad_norm": 2.183762788772583, |
|
"learning_rate": 8.596491228070177e-05, |
|
"loss": 0.0171, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.9298245614035086, |
|
"grad_norm": 2.652369737625122, |
|
"learning_rate": 8.45029239766082e-05, |
|
"loss": 0.0813, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 0.02325315773487091, |
|
"learning_rate": 8.304093567251462e-05, |
|
"loss": 0.0377, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"eval_accuracy": 0.9906542056074766, |
|
"eval_loss": 0.07433710992336273, |
|
"eval_runtime": 36.845, |
|
"eval_samples_per_second": 8.712, |
|
"eval_steps_per_second": 1.113, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.105263157894737, |
|
"grad_norm": 0.002300787018612027, |
|
"learning_rate": 8.157894736842105e-05, |
|
"loss": 0.0111, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.192982456140351, |
|
"grad_norm": 0.8302111625671387, |
|
"learning_rate": 8.01169590643275e-05, |
|
"loss": 0.0242, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.280701754385965, |
|
"grad_norm": 0.0038752101827412844, |
|
"learning_rate": 7.865497076023393e-05, |
|
"loss": 0.0003, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 0.19968080520629883, |
|
"learning_rate": 7.719298245614036e-05, |
|
"loss": 0.0124, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.456140350877193, |
|
"grad_norm": 0.9523430466651917, |
|
"learning_rate": 7.573099415204679e-05, |
|
"loss": 0.0279, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.543859649122807, |
|
"grad_norm": 1.5014656782150269, |
|
"learning_rate": 7.426900584795321e-05, |
|
"loss": 0.0167, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.631578947368421, |
|
"grad_norm": 10.451250076293945, |
|
"learning_rate": 7.280701754385966e-05, |
|
"loss": 0.0142, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 2.1814494132995605, |
|
"learning_rate": 7.134502923976609e-05, |
|
"loss": 0.0469, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.807017543859649, |
|
"grad_norm": 2.4280495643615723, |
|
"learning_rate": 6.988304093567252e-05, |
|
"loss": 0.0096, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"grad_norm": 0.14394675195217133, |
|
"learning_rate": 6.842105263157895e-05, |
|
"loss": 0.062, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"eval_accuracy": 0.9781931464174455, |
|
"eval_loss": 0.16418205201625824, |
|
"eval_runtime": 37.5674, |
|
"eval_samples_per_second": 8.545, |
|
"eval_steps_per_second": 1.091, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.982456140350877, |
|
"grad_norm": 0.20802760124206543, |
|
"learning_rate": 6.695906432748539e-05, |
|
"loss": 0.0651, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 0.10575652867555618, |
|
"learning_rate": 6.549707602339182e-05, |
|
"loss": 0.0115, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.157894736842104, |
|
"grad_norm": 0.8286623954772949, |
|
"learning_rate": 6.403508771929825e-05, |
|
"loss": 0.0087, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.24561403508772, |
|
"grad_norm": 0.39614301919937134, |
|
"learning_rate": 6.257309941520468e-05, |
|
"loss": 0.0198, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 0.0022884588688611984, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 0.0006, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 0.06943880021572113, |
|
"learning_rate": 5.9649122807017544e-05, |
|
"loss": 0.0138, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.508771929824562, |
|
"grad_norm": 0.62754225730896, |
|
"learning_rate": 5.818713450292398e-05, |
|
"loss": 0.004, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.596491228070175, |
|
"grad_norm": 0.0023998187389224768, |
|
"learning_rate": 5.6725146198830416e-05, |
|
"loss": 0.0039, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 8.68421052631579, |
|
"grad_norm": 0.12702779471874237, |
|
"learning_rate": 5.526315789473685e-05, |
|
"loss": 0.0277, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 4.152740955352783, |
|
"learning_rate": 5.3801169590643275e-05, |
|
"loss": 0.0025, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"eval_accuracy": 0.9875389408099688, |
|
"eval_loss": 0.05983072146773338, |
|
"eval_runtime": 37.137, |
|
"eval_samples_per_second": 8.644, |
|
"eval_steps_per_second": 1.104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.859649122807017, |
|
"grad_norm": 0.05080701410770416, |
|
"learning_rate": 5.233918128654971e-05, |
|
"loss": 0.1165, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 8.947368421052632, |
|
"grad_norm": 0.007482455112040043, |
|
"learning_rate": 5.087719298245615e-05, |
|
"loss": 0.0076, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 9.035087719298245, |
|
"grad_norm": 4.2491483327466995e-05, |
|
"learning_rate": 4.941520467836258e-05, |
|
"loss": 0.0035, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 0.017159271985292435, |
|
"learning_rate": 4.7953216374269006e-05, |
|
"loss": 0.0157, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.210526315789474, |
|
"grad_norm": 0.1872653365135193, |
|
"learning_rate": 4.649122807017544e-05, |
|
"loss": 0.0113, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.298245614035087, |
|
"grad_norm": 0.000704402569681406, |
|
"learning_rate": 4.502923976608187e-05, |
|
"loss": 0.0003, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.385964912280702, |
|
"grad_norm": 0.938255250453949, |
|
"learning_rate": 4.356725146198831e-05, |
|
"loss": 0.015, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 0.03299758583307266, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.0333, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.56140350877193, |
|
"grad_norm": 3.074659824371338, |
|
"learning_rate": 4.0643274853801174e-05, |
|
"loss": 0.0159, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"grad_norm": 0.00028988588019274175, |
|
"learning_rate": 3.9181286549707604e-05, |
|
"loss": 0.0041, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"eval_accuracy": 0.9813084112149533, |
|
"eval_loss": 0.12797725200653076, |
|
"eval_runtime": 37.1154, |
|
"eval_samples_per_second": 8.649, |
|
"eval_steps_per_second": 1.105, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.736842105263158, |
|
"grad_norm": 0.0019729495979845524, |
|
"learning_rate": 3.771929824561404e-05, |
|
"loss": 0.0362, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 0.0025722056161612272, |
|
"learning_rate": 3.625730994152047e-05, |
|
"loss": 0.0009, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 9.912280701754385, |
|
"grad_norm": 1.9475336074829102, |
|
"learning_rate": 3.4795321637426905e-05, |
|
"loss": 0.0368, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.05613500997424126, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.032, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 10.087719298245615, |
|
"grad_norm": 0.031211694702506065, |
|
"learning_rate": 3.187134502923977e-05, |
|
"loss": 0.0146, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.175438596491228, |
|
"grad_norm": 1.126404881477356, |
|
"learning_rate": 3.0409356725146197e-05, |
|
"loss": 0.0035, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 10.263157894736842, |
|
"grad_norm": 0.0027087063062936068, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.0031, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 10.350877192982455, |
|
"grad_norm": 0.012115487828850746, |
|
"learning_rate": 2.7485380116959063e-05, |
|
"loss": 0.0152, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 10.43859649122807, |
|
"grad_norm": 0.0014923892449587584, |
|
"learning_rate": 2.60233918128655e-05, |
|
"loss": 0.0298, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 10.097140312194824, |
|
"learning_rate": 2.456140350877193e-05, |
|
"loss": 0.0305, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"eval_accuracy": 0.9813084112149533, |
|
"eval_loss": 0.0920475497841835, |
|
"eval_runtime": 38.0741, |
|
"eval_samples_per_second": 8.431, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.614035087719298, |
|
"grad_norm": 0.04899158701300621, |
|
"learning_rate": 2.309941520467836e-05, |
|
"loss": 0.0228, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 10.701754385964913, |
|
"grad_norm": 1.95464289188385, |
|
"learning_rate": 2.1637426900584794e-05, |
|
"loss": 0.0053, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.789473684210526, |
|
"grad_norm": 0.011684381403028965, |
|
"learning_rate": 2.0175438596491227e-05, |
|
"loss": 0.0057, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 10.87719298245614, |
|
"grad_norm": 0.004223628900945187, |
|
"learning_rate": 1.871345029239766e-05, |
|
"loss": 0.0025, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 10.964912280701755, |
|
"grad_norm": 0.018841328099370003, |
|
"learning_rate": 1.7251461988304093e-05, |
|
"loss": 0.0029, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 11.052631578947368, |
|
"grad_norm": 0.0007397038862109184, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.1863, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 11.140350877192983, |
|
"grad_norm": 0.12742939591407776, |
|
"learning_rate": 1.4327485380116959e-05, |
|
"loss": 0.0001, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 11.228070175438596, |
|
"grad_norm": 0.05407753214240074, |
|
"learning_rate": 1.2865497076023392e-05, |
|
"loss": 0.0001, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 11.31578947368421, |
|
"grad_norm": 1.0473092794418335, |
|
"learning_rate": 1.1403508771929824e-05, |
|
"loss": 0.0458, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 11.403508771929825, |
|
"grad_norm": 1.6566870212554932, |
|
"learning_rate": 9.941520467836257e-06, |
|
"loss": 0.0148, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.403508771929825, |
|
"eval_accuracy": 0.9875389408099688, |
|
"eval_loss": 0.12092220783233643, |
|
"eval_runtime": 36.0207, |
|
"eval_samples_per_second": 8.912, |
|
"eval_steps_per_second": 1.138, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.491228070175438, |
|
"grad_norm": 0.0020481087267398834, |
|
"learning_rate": 8.47953216374269e-06, |
|
"loss": 0.0019, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 11.578947368421053, |
|
"grad_norm": 0.064646415412426, |
|
"learning_rate": 7.017543859649123e-06, |
|
"loss": 0.0064, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 11.666666666666666, |
|
"grad_norm": 0.0019295840756967664, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0377, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 11.75438596491228, |
|
"grad_norm": 0.25071534514427185, |
|
"learning_rate": 4.093567251461989e-06, |
|
"loss": 0.0035, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 11.842105263157894, |
|
"grad_norm": 0.48753437399864197, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.0237, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 11.929824561403509, |
|
"grad_norm": 0.01427147351205349, |
|
"learning_rate": 1.1695906432748538e-06, |
|
"loss": 0.0029, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 1368, |
|
"total_flos": 6.93495297202176e+16, |
|
"train_loss": 0.044889834785714136, |
|
"train_runtime": 3145.9622, |
|
"train_samples_per_second": 6.916, |
|
"train_steps_per_second": 0.435 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1368, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.93495297202176e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|