finetuned-for-YogaPosesv4 / trainer_state.json
AdityasArsenal's picture
🍻 cheers
a9bc670 verified
{
"best_metric": 0.9906542056074766,
"best_model_checkpoint": "finetuned-for-YogaPosesv4/checkpoint-800",
"epoch": 12.0,
"eval_steps": 100,
"global_step": 1368,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08771929824561403,
"grad_norm": 7.6294169425964355,
"learning_rate": 0.00019853801169590645,
"loss": 0.1835,
"step": 10
},
{
"epoch": 0.17543859649122806,
"grad_norm": 2.819319009780884,
"learning_rate": 0.00019707602339181287,
"loss": 0.1356,
"step": 20
},
{
"epoch": 0.2631578947368421,
"grad_norm": 7.1533966064453125,
"learning_rate": 0.0001956140350877193,
"loss": 0.0432,
"step": 30
},
{
"epoch": 0.3508771929824561,
"grad_norm": 6.63047456741333,
"learning_rate": 0.00019415204678362573,
"loss": 0.1341,
"step": 40
},
{
"epoch": 0.43859649122807015,
"grad_norm": 7.353053092956543,
"learning_rate": 0.00019269005847953217,
"loss": 0.0872,
"step": 50
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.5175597071647644,
"learning_rate": 0.0001912280701754386,
"loss": 0.0915,
"step": 60
},
{
"epoch": 0.6140350877192983,
"grad_norm": 2.7669591903686523,
"learning_rate": 0.00018976608187134503,
"loss": 0.0663,
"step": 70
},
{
"epoch": 0.7017543859649122,
"grad_norm": 15.684842109680176,
"learning_rate": 0.00018830409356725147,
"loss": 0.0644,
"step": 80
},
{
"epoch": 0.7894736842105263,
"grad_norm": 12.486333847045898,
"learning_rate": 0.00018684210526315792,
"loss": 0.1148,
"step": 90
},
{
"epoch": 0.8771929824561403,
"grad_norm": 2.681043863296509,
"learning_rate": 0.00018538011695906433,
"loss": 0.0434,
"step": 100
},
{
"epoch": 0.8771929824561403,
"eval_accuracy": 0.9595015576323987,
"eval_loss": 0.1421448141336441,
"eval_runtime": 35.5517,
"eval_samples_per_second": 9.029,
"eval_steps_per_second": 1.153,
"step": 100
},
{
"epoch": 0.9649122807017544,
"grad_norm": 0.4669760465621948,
"learning_rate": 0.00018391812865497077,
"loss": 0.1035,
"step": 110
},
{
"epoch": 1.0526315789473684,
"grad_norm": 1.2481253147125244,
"learning_rate": 0.0001824561403508772,
"loss": 0.0242,
"step": 120
},
{
"epoch": 1.1403508771929824,
"grad_norm": 0.747638463973999,
"learning_rate": 0.00018099415204678363,
"loss": 0.0911,
"step": 130
},
{
"epoch": 1.2280701754385965,
"grad_norm": 4.2482428550720215,
"learning_rate": 0.00017953216374269005,
"loss": 0.0243,
"step": 140
},
{
"epoch": 1.3157894736842106,
"grad_norm": 14.569940567016602,
"learning_rate": 0.0001780701754385965,
"loss": 0.0906,
"step": 150
},
{
"epoch": 1.4035087719298245,
"grad_norm": 6.877868175506592,
"learning_rate": 0.00017660818713450294,
"loss": 0.0318,
"step": 160
},
{
"epoch": 1.4912280701754386,
"grad_norm": 0.7561317086219788,
"learning_rate": 0.00017514619883040938,
"loss": 0.0707,
"step": 170
},
{
"epoch": 1.5789473684210527,
"grad_norm": 3.7642083168029785,
"learning_rate": 0.0001736842105263158,
"loss": 0.0514,
"step": 180
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.7942452430725098,
"learning_rate": 0.00017222222222222224,
"loss": 0.0531,
"step": 190
},
{
"epoch": 1.7543859649122808,
"grad_norm": 0.05782110244035721,
"learning_rate": 0.00017076023391812865,
"loss": 0.033,
"step": 200
},
{
"epoch": 1.7543859649122808,
"eval_accuracy": 0.9875389408099688,
"eval_loss": 0.0879807248711586,
"eval_runtime": 40.2074,
"eval_samples_per_second": 7.984,
"eval_steps_per_second": 1.02,
"step": 200
},
{
"epoch": 1.8421052631578947,
"grad_norm": 13.322481155395508,
"learning_rate": 0.0001692982456140351,
"loss": 0.1105,
"step": 210
},
{
"epoch": 1.9298245614035088,
"grad_norm": 0.9878503084182739,
"learning_rate": 0.0001678362573099415,
"loss": 0.0581,
"step": 220
},
{
"epoch": 2.017543859649123,
"grad_norm": 3.3777120113372803,
"learning_rate": 0.00016637426900584796,
"loss": 0.0391,
"step": 230
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.03219028189778328,
"learning_rate": 0.0001649122807017544,
"loss": 0.0413,
"step": 240
},
{
"epoch": 2.192982456140351,
"grad_norm": 11.16067123413086,
"learning_rate": 0.00016345029239766084,
"loss": 0.0693,
"step": 250
},
{
"epoch": 2.280701754385965,
"grad_norm": 17.428421020507812,
"learning_rate": 0.00016198830409356726,
"loss": 0.1285,
"step": 260
},
{
"epoch": 2.3684210526315788,
"grad_norm": 3.1800684928894043,
"learning_rate": 0.0001605263157894737,
"loss": 0.0185,
"step": 270
},
{
"epoch": 2.456140350877193,
"grad_norm": 0.18285800516605377,
"learning_rate": 0.00015906432748538012,
"loss": 0.0343,
"step": 280
},
{
"epoch": 2.543859649122807,
"grad_norm": 4.973541259765625,
"learning_rate": 0.00015760233918128656,
"loss": 0.053,
"step": 290
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.5851675271987915,
"learning_rate": 0.00015614035087719297,
"loss": 0.084,
"step": 300
},
{
"epoch": 2.6315789473684212,
"eval_accuracy": 0.9844236760124611,
"eval_loss": 0.09186869114637375,
"eval_runtime": 39.6899,
"eval_samples_per_second": 8.088,
"eval_steps_per_second": 1.033,
"step": 300
},
{
"epoch": 2.719298245614035,
"grad_norm": 0.42921513319015503,
"learning_rate": 0.00015467836257309942,
"loss": 0.0589,
"step": 310
},
{
"epoch": 2.807017543859649,
"grad_norm": 14.734365463256836,
"learning_rate": 0.00015321637426900586,
"loss": 0.0975,
"step": 320
},
{
"epoch": 2.8947368421052633,
"grad_norm": 3.1166839599609375,
"learning_rate": 0.0001517543859649123,
"loss": 0.0401,
"step": 330
},
{
"epoch": 2.982456140350877,
"grad_norm": 3.258279800415039,
"learning_rate": 0.00015029239766081872,
"loss": 0.0549,
"step": 340
},
{
"epoch": 3.0701754385964914,
"grad_norm": 0.8289499878883362,
"learning_rate": 0.00014883040935672514,
"loss": 0.0592,
"step": 350
},
{
"epoch": 3.1578947368421053,
"grad_norm": 0.16162537038326263,
"learning_rate": 0.00014736842105263158,
"loss": 0.1327,
"step": 360
},
{
"epoch": 3.245614035087719,
"grad_norm": 3.4843053817749023,
"learning_rate": 0.00014590643274853802,
"loss": 0.0478,
"step": 370
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.20168577134609222,
"learning_rate": 0.00014444444444444444,
"loss": 0.0901,
"step": 380
},
{
"epoch": 3.4210526315789473,
"grad_norm": 3.0230636596679688,
"learning_rate": 0.00014298245614035088,
"loss": 0.0749,
"step": 390
},
{
"epoch": 3.5087719298245617,
"grad_norm": 4.313819408416748,
"learning_rate": 0.00014152046783625732,
"loss": 0.0655,
"step": 400
},
{
"epoch": 3.5087719298245617,
"eval_accuracy": 0.9875389408099688,
"eval_loss": 0.09323806315660477,
"eval_runtime": 37.6349,
"eval_samples_per_second": 8.529,
"eval_steps_per_second": 1.089,
"step": 400
},
{
"epoch": 3.5964912280701755,
"grad_norm": 0.0746302455663681,
"learning_rate": 0.00014005847953216377,
"loss": 0.0508,
"step": 410
},
{
"epoch": 3.6842105263157894,
"grad_norm": 12.243117332458496,
"learning_rate": 0.00013859649122807018,
"loss": 0.0327,
"step": 420
},
{
"epoch": 3.7719298245614032,
"grad_norm": 0.5793654918670654,
"learning_rate": 0.0001371345029239766,
"loss": 0.05,
"step": 430
},
{
"epoch": 3.8596491228070176,
"grad_norm": 0.30418357253074646,
"learning_rate": 0.00013567251461988304,
"loss": 0.0135,
"step": 440
},
{
"epoch": 3.9473684210526314,
"grad_norm": 0.7474086284637451,
"learning_rate": 0.00013421052631578948,
"loss": 0.0464,
"step": 450
},
{
"epoch": 4.035087719298246,
"grad_norm": 3.3401076793670654,
"learning_rate": 0.0001327485380116959,
"loss": 0.181,
"step": 460
},
{
"epoch": 4.12280701754386,
"grad_norm": 3.931779623031616,
"learning_rate": 0.00013128654970760234,
"loss": 0.0092,
"step": 470
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.41829147934913635,
"learning_rate": 0.0001298245614035088,
"loss": 0.0177,
"step": 480
},
{
"epoch": 4.298245614035087,
"grad_norm": 0.22863353788852692,
"learning_rate": 0.00012836257309941523,
"loss": 0.0183,
"step": 490
},
{
"epoch": 4.385964912280702,
"grad_norm": 0.01852666214108467,
"learning_rate": 0.00012690058479532165,
"loss": 0.0126,
"step": 500
},
{
"epoch": 4.385964912280702,
"eval_accuracy": 0.9875389408099688,
"eval_loss": 0.06964406371116638,
"eval_runtime": 36.8852,
"eval_samples_per_second": 8.703,
"eval_steps_per_second": 1.112,
"step": 500
},
{
"epoch": 4.473684210526316,
"grad_norm": 1.5573415756225586,
"learning_rate": 0.00012543859649122806,
"loss": 0.0668,
"step": 510
},
{
"epoch": 4.56140350877193,
"grad_norm": 0.009869000874459743,
"learning_rate": 0.0001239766081871345,
"loss": 0.0137,
"step": 520
},
{
"epoch": 4.649122807017544,
"grad_norm": 0.20358864963054657,
"learning_rate": 0.00012251461988304095,
"loss": 0.0164,
"step": 530
},
{
"epoch": 4.7368421052631575,
"grad_norm": 0.5513414144515991,
"learning_rate": 0.00012105263157894738,
"loss": 0.028,
"step": 540
},
{
"epoch": 4.824561403508772,
"grad_norm": 3.767604351043701,
"learning_rate": 0.0001195906432748538,
"loss": 0.016,
"step": 550
},
{
"epoch": 4.912280701754386,
"grad_norm": 0.5676788091659546,
"learning_rate": 0.00011812865497076025,
"loss": 0.037,
"step": 560
},
{
"epoch": 5.0,
"grad_norm": 94.76567077636719,
"learning_rate": 0.00011666666666666668,
"loss": 0.2472,
"step": 570
},
{
"epoch": 5.087719298245614,
"grad_norm": 0.013286423869431019,
"learning_rate": 0.00011520467836257311,
"loss": 0.0449,
"step": 580
},
{
"epoch": 5.175438596491228,
"grad_norm": 2.3555777072906494,
"learning_rate": 0.00011374269005847952,
"loss": 0.1002,
"step": 590
},
{
"epoch": 5.2631578947368425,
"grad_norm": 21.486955642700195,
"learning_rate": 0.00011228070175438597,
"loss": 0.0487,
"step": 600
},
{
"epoch": 5.2631578947368425,
"eval_accuracy": 0.9719626168224299,
"eval_loss": 0.08467654883861542,
"eval_runtime": 35.5793,
"eval_samples_per_second": 9.022,
"eval_steps_per_second": 1.152,
"step": 600
},
{
"epoch": 5.350877192982456,
"grad_norm": 3.456000328063965,
"learning_rate": 0.0001108187134502924,
"loss": 0.0451,
"step": 610
},
{
"epoch": 5.43859649122807,
"grad_norm": 0.21063438057899475,
"learning_rate": 0.00010935672514619884,
"loss": 0.0016,
"step": 620
},
{
"epoch": 5.526315789473684,
"grad_norm": 0.007295363582670689,
"learning_rate": 0.00010789473684210527,
"loss": 0.0298,
"step": 630
},
{
"epoch": 5.614035087719298,
"grad_norm": 1.68939208984375,
"learning_rate": 0.00010643274853801171,
"loss": 0.0175,
"step": 640
},
{
"epoch": 5.701754385964913,
"grad_norm": 0.7690061330795288,
"learning_rate": 0.00010497076023391814,
"loss": 0.0565,
"step": 650
},
{
"epoch": 5.7894736842105265,
"grad_norm": 0.004914429504424334,
"learning_rate": 0.00010350877192982457,
"loss": 0.0151,
"step": 660
},
{
"epoch": 5.87719298245614,
"grad_norm": 2.762733221054077,
"learning_rate": 0.00010204678362573099,
"loss": 0.0428,
"step": 670
},
{
"epoch": 5.964912280701754,
"grad_norm": 1.9332022666931152,
"learning_rate": 0.00010058479532163743,
"loss": 0.0335,
"step": 680
},
{
"epoch": 6.052631578947368,
"grad_norm": 0.0930035263299942,
"learning_rate": 9.912280701754386e-05,
"loss": 0.0342,
"step": 690
},
{
"epoch": 6.140350877192983,
"grad_norm": 0.03953389823436737,
"learning_rate": 9.76608187134503e-05,
"loss": 0.0114,
"step": 700
},
{
"epoch": 6.140350877192983,
"eval_accuracy": 0.9813084112149533,
"eval_loss": 0.11030136793851852,
"eval_runtime": 35.4585,
"eval_samples_per_second": 9.053,
"eval_steps_per_second": 1.156,
"step": 700
},
{
"epoch": 6.228070175438597,
"grad_norm": 0.06557751446962357,
"learning_rate": 9.619883040935673e-05,
"loss": 0.0089,
"step": 710
},
{
"epoch": 6.315789473684211,
"grad_norm": 11.056671142578125,
"learning_rate": 9.473684210526316e-05,
"loss": 0.0294,
"step": 720
},
{
"epoch": 6.4035087719298245,
"grad_norm": 0.26337969303131104,
"learning_rate": 9.327485380116959e-05,
"loss": 0.0213,
"step": 730
},
{
"epoch": 6.491228070175438,
"grad_norm": 0.15492522716522217,
"learning_rate": 9.181286549707603e-05,
"loss": 0.0469,
"step": 740
},
{
"epoch": 6.578947368421053,
"grad_norm": 0.013370133936405182,
"learning_rate": 9.035087719298246e-05,
"loss": 0.0004,
"step": 750
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.10022695362567902,
"learning_rate": 8.888888888888889e-05,
"loss": 0.0255,
"step": 760
},
{
"epoch": 6.754385964912281,
"grad_norm": 0.0027779792435467243,
"learning_rate": 8.742690058479532e-05,
"loss": 0.0358,
"step": 770
},
{
"epoch": 6.842105263157895,
"grad_norm": 2.183762788772583,
"learning_rate": 8.596491228070177e-05,
"loss": 0.0171,
"step": 780
},
{
"epoch": 6.9298245614035086,
"grad_norm": 2.652369737625122,
"learning_rate": 8.45029239766082e-05,
"loss": 0.0813,
"step": 790
},
{
"epoch": 7.017543859649122,
"grad_norm": 0.02325315773487091,
"learning_rate": 8.304093567251462e-05,
"loss": 0.0377,
"step": 800
},
{
"epoch": 7.017543859649122,
"eval_accuracy": 0.9906542056074766,
"eval_loss": 0.07433710992336273,
"eval_runtime": 36.845,
"eval_samples_per_second": 8.712,
"eval_steps_per_second": 1.113,
"step": 800
},
{
"epoch": 7.105263157894737,
"grad_norm": 0.002300787018612027,
"learning_rate": 8.157894736842105e-05,
"loss": 0.0111,
"step": 810
},
{
"epoch": 7.192982456140351,
"grad_norm": 0.8302111625671387,
"learning_rate": 8.01169590643275e-05,
"loss": 0.0242,
"step": 820
},
{
"epoch": 7.280701754385965,
"grad_norm": 0.0038752101827412844,
"learning_rate": 7.865497076023393e-05,
"loss": 0.0003,
"step": 830
},
{
"epoch": 7.368421052631579,
"grad_norm": 0.19968080520629883,
"learning_rate": 7.719298245614036e-05,
"loss": 0.0124,
"step": 840
},
{
"epoch": 7.456140350877193,
"grad_norm": 0.9523430466651917,
"learning_rate": 7.573099415204679e-05,
"loss": 0.0279,
"step": 850
},
{
"epoch": 7.543859649122807,
"grad_norm": 1.5014656782150269,
"learning_rate": 7.426900584795321e-05,
"loss": 0.0167,
"step": 860
},
{
"epoch": 7.631578947368421,
"grad_norm": 10.451250076293945,
"learning_rate": 7.280701754385966e-05,
"loss": 0.0142,
"step": 870
},
{
"epoch": 7.719298245614035,
"grad_norm": 2.1814494132995605,
"learning_rate": 7.134502923976609e-05,
"loss": 0.0469,
"step": 880
},
{
"epoch": 7.807017543859649,
"grad_norm": 2.4280495643615723,
"learning_rate": 6.988304093567252e-05,
"loss": 0.0096,
"step": 890
},
{
"epoch": 7.894736842105263,
"grad_norm": 0.14394675195217133,
"learning_rate": 6.842105263157895e-05,
"loss": 0.062,
"step": 900
},
{
"epoch": 7.894736842105263,
"eval_accuracy": 0.9781931464174455,
"eval_loss": 0.16418205201625824,
"eval_runtime": 37.5674,
"eval_samples_per_second": 8.545,
"eval_steps_per_second": 1.091,
"step": 900
},
{
"epoch": 7.982456140350877,
"grad_norm": 0.20802760124206543,
"learning_rate": 6.695906432748539e-05,
"loss": 0.0651,
"step": 910
},
{
"epoch": 8.070175438596491,
"grad_norm": 0.10575652867555618,
"learning_rate": 6.549707602339182e-05,
"loss": 0.0115,
"step": 920
},
{
"epoch": 8.157894736842104,
"grad_norm": 0.8286623954772949,
"learning_rate": 6.403508771929825e-05,
"loss": 0.0087,
"step": 930
},
{
"epoch": 8.24561403508772,
"grad_norm": 0.39614301919937134,
"learning_rate": 6.257309941520468e-05,
"loss": 0.0198,
"step": 940
},
{
"epoch": 8.333333333333334,
"grad_norm": 0.0022884588688611984,
"learning_rate": 6.111111111111112e-05,
"loss": 0.0006,
"step": 950
},
{
"epoch": 8.421052631578947,
"grad_norm": 0.06943880021572113,
"learning_rate": 5.9649122807017544e-05,
"loss": 0.0138,
"step": 960
},
{
"epoch": 8.508771929824562,
"grad_norm": 0.62754225730896,
"learning_rate": 5.818713450292398e-05,
"loss": 0.004,
"step": 970
},
{
"epoch": 8.596491228070175,
"grad_norm": 0.0023998187389224768,
"learning_rate": 5.6725146198830416e-05,
"loss": 0.0039,
"step": 980
},
{
"epoch": 8.68421052631579,
"grad_norm": 0.12702779471874237,
"learning_rate": 5.526315789473685e-05,
"loss": 0.0277,
"step": 990
},
{
"epoch": 8.771929824561404,
"grad_norm": 4.152740955352783,
"learning_rate": 5.3801169590643275e-05,
"loss": 0.0025,
"step": 1000
},
{
"epoch": 8.771929824561404,
"eval_accuracy": 0.9875389408099688,
"eval_loss": 0.05983072146773338,
"eval_runtime": 37.137,
"eval_samples_per_second": 8.644,
"eval_steps_per_second": 1.104,
"step": 1000
},
{
"epoch": 8.859649122807017,
"grad_norm": 0.05080701410770416,
"learning_rate": 5.233918128654971e-05,
"loss": 0.1165,
"step": 1010
},
{
"epoch": 8.947368421052632,
"grad_norm": 0.007482455112040043,
"learning_rate": 5.087719298245615e-05,
"loss": 0.0076,
"step": 1020
},
{
"epoch": 9.035087719298245,
"grad_norm": 4.2491483327466995e-05,
"learning_rate": 4.941520467836258e-05,
"loss": 0.0035,
"step": 1030
},
{
"epoch": 9.12280701754386,
"grad_norm": 0.017159271985292435,
"learning_rate": 4.7953216374269006e-05,
"loss": 0.0157,
"step": 1040
},
{
"epoch": 9.210526315789474,
"grad_norm": 0.1872653365135193,
"learning_rate": 4.649122807017544e-05,
"loss": 0.0113,
"step": 1050
},
{
"epoch": 9.298245614035087,
"grad_norm": 0.000704402569681406,
"learning_rate": 4.502923976608187e-05,
"loss": 0.0003,
"step": 1060
},
{
"epoch": 9.385964912280702,
"grad_norm": 0.938255250453949,
"learning_rate": 4.356725146198831e-05,
"loss": 0.015,
"step": 1070
},
{
"epoch": 9.473684210526315,
"grad_norm": 0.03299758583307266,
"learning_rate": 4.210526315789474e-05,
"loss": 0.0333,
"step": 1080
},
{
"epoch": 9.56140350877193,
"grad_norm": 3.074659824371338,
"learning_rate": 4.0643274853801174e-05,
"loss": 0.0159,
"step": 1090
},
{
"epoch": 9.649122807017545,
"grad_norm": 0.00028988588019274175,
"learning_rate": 3.9181286549707604e-05,
"loss": 0.0041,
"step": 1100
},
{
"epoch": 9.649122807017545,
"eval_accuracy": 0.9813084112149533,
"eval_loss": 0.12797725200653076,
"eval_runtime": 37.1154,
"eval_samples_per_second": 8.649,
"eval_steps_per_second": 1.105,
"step": 1100
},
{
"epoch": 9.736842105263158,
"grad_norm": 0.0019729495979845524,
"learning_rate": 3.771929824561404e-05,
"loss": 0.0362,
"step": 1110
},
{
"epoch": 9.824561403508772,
"grad_norm": 0.0025722056161612272,
"learning_rate": 3.625730994152047e-05,
"loss": 0.0009,
"step": 1120
},
{
"epoch": 9.912280701754385,
"grad_norm": 1.9475336074829102,
"learning_rate": 3.4795321637426905e-05,
"loss": 0.0368,
"step": 1130
},
{
"epoch": 10.0,
"grad_norm": 0.05613500997424126,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.032,
"step": 1140
},
{
"epoch": 10.087719298245615,
"grad_norm": 0.031211694702506065,
"learning_rate": 3.187134502923977e-05,
"loss": 0.0146,
"step": 1150
},
{
"epoch": 10.175438596491228,
"grad_norm": 1.126404881477356,
"learning_rate": 3.0409356725146197e-05,
"loss": 0.0035,
"step": 1160
},
{
"epoch": 10.263157894736842,
"grad_norm": 0.0027087063062936068,
"learning_rate": 2.8947368421052634e-05,
"loss": 0.0031,
"step": 1170
},
{
"epoch": 10.350877192982455,
"grad_norm": 0.012115487828850746,
"learning_rate": 2.7485380116959063e-05,
"loss": 0.0152,
"step": 1180
},
{
"epoch": 10.43859649122807,
"grad_norm": 0.0014923892449587584,
"learning_rate": 2.60233918128655e-05,
"loss": 0.0298,
"step": 1190
},
{
"epoch": 10.526315789473685,
"grad_norm": 10.097140312194824,
"learning_rate": 2.456140350877193e-05,
"loss": 0.0305,
"step": 1200
},
{
"epoch": 10.526315789473685,
"eval_accuracy": 0.9813084112149533,
"eval_loss": 0.0920475497841835,
"eval_runtime": 38.0741,
"eval_samples_per_second": 8.431,
"eval_steps_per_second": 1.077,
"step": 1200
},
{
"epoch": 10.614035087719298,
"grad_norm": 0.04899158701300621,
"learning_rate": 2.309941520467836e-05,
"loss": 0.0228,
"step": 1210
},
{
"epoch": 10.701754385964913,
"grad_norm": 1.95464289188385,
"learning_rate": 2.1637426900584794e-05,
"loss": 0.0053,
"step": 1220
},
{
"epoch": 10.789473684210526,
"grad_norm": 0.011684381403028965,
"learning_rate": 2.0175438596491227e-05,
"loss": 0.0057,
"step": 1230
},
{
"epoch": 10.87719298245614,
"grad_norm": 0.004223628900945187,
"learning_rate": 1.871345029239766e-05,
"loss": 0.0025,
"step": 1240
},
{
"epoch": 10.964912280701755,
"grad_norm": 0.018841328099370003,
"learning_rate": 1.7251461988304093e-05,
"loss": 0.0029,
"step": 1250
},
{
"epoch": 11.052631578947368,
"grad_norm": 0.0007397038862109184,
"learning_rate": 1.5789473684210526e-05,
"loss": 0.1863,
"step": 1260
},
{
"epoch": 11.140350877192983,
"grad_norm": 0.12742939591407776,
"learning_rate": 1.4327485380116959e-05,
"loss": 0.0001,
"step": 1270
},
{
"epoch": 11.228070175438596,
"grad_norm": 0.05407753214240074,
"learning_rate": 1.2865497076023392e-05,
"loss": 0.0001,
"step": 1280
},
{
"epoch": 11.31578947368421,
"grad_norm": 1.0473092794418335,
"learning_rate": 1.1403508771929824e-05,
"loss": 0.0458,
"step": 1290
},
{
"epoch": 11.403508771929825,
"grad_norm": 1.6566870212554932,
"learning_rate": 9.941520467836257e-06,
"loss": 0.0148,
"step": 1300
},
{
"epoch": 11.403508771929825,
"eval_accuracy": 0.9875389408099688,
"eval_loss": 0.12092220783233643,
"eval_runtime": 36.0207,
"eval_samples_per_second": 8.912,
"eval_steps_per_second": 1.138,
"step": 1300
},
{
"epoch": 11.491228070175438,
"grad_norm": 0.0020481087267398834,
"learning_rate": 8.47953216374269e-06,
"loss": 0.0019,
"step": 1310
},
{
"epoch": 11.578947368421053,
"grad_norm": 0.064646415412426,
"learning_rate": 7.017543859649123e-06,
"loss": 0.0064,
"step": 1320
},
{
"epoch": 11.666666666666666,
"grad_norm": 0.0019295840756967664,
"learning_rate": 5.555555555555556e-06,
"loss": 0.0377,
"step": 1330
},
{
"epoch": 11.75438596491228,
"grad_norm": 0.25071534514427185,
"learning_rate": 4.093567251461989e-06,
"loss": 0.0035,
"step": 1340
},
{
"epoch": 11.842105263157894,
"grad_norm": 0.48753437399864197,
"learning_rate": 2.631578947368421e-06,
"loss": 0.0237,
"step": 1350
},
{
"epoch": 11.929824561403509,
"grad_norm": 0.01427147351205349,
"learning_rate": 1.1695906432748538e-06,
"loss": 0.0029,
"step": 1360
},
{
"epoch": 12.0,
"step": 1368,
"total_flos": 6.93495297202176e+16,
"train_loss": 0.044889834785714136,
"train_runtime": 3145.9622,
"train_samples_per_second": 6.916,
"train_steps_per_second": 0.435
}
],
"logging_steps": 10,
"max_steps": 1368,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.93495297202176e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}