|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9968, |
|
"eval_steps": 500, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0192, |
|
"grad_norm": 2.423678398132324, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.7932, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 2.6566708087921143, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.7613, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0576, |
|
"grad_norm": 2.337695837020874, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.8108, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 1.94878351688385, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.7694, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 1.4400192499160767, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.7506, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 1.514625906944275, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.6907, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1344, |
|
"grad_norm": 1.185624599456787, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.68, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 1.3777049779891968, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.6178, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1728, |
|
"grad_norm": 1.2356456518173218, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.6484, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.9463790059089661, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.6111, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2112, |
|
"grad_norm": 1.3397246599197388, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6009, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 1.0940124988555908, |
|
"learning_rate": 1.999429490929718e-05, |
|
"loss": 0.6054, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2496, |
|
"grad_norm": 0.9236961007118225, |
|
"learning_rate": 1.9977186146800707e-05, |
|
"loss": 0.5779, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 0.9376490712165833, |
|
"learning_rate": 1.994869323391895e-05, |
|
"loss": 0.5472, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.8171873688697815, |
|
"learning_rate": 1.990884868158239e-05, |
|
"loss": 0.5776, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 0.5843209624290466, |
|
"learning_rate": 1.985769795314804e-05, |
|
"loss": 0.5392, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3264, |
|
"grad_norm": 0.8455219864845276, |
|
"learning_rate": 1.9795299412524948e-05, |
|
"loss": 0.5566, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 0.8293880224227905, |
|
"learning_rate": 1.9721724257579907e-05, |
|
"loss": 0.5294, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3648, |
|
"grad_norm": 0.5752674341201782, |
|
"learning_rate": 1.963705643889941e-05, |
|
"loss": 0.5143, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.6286266446113586, |
|
"learning_rate": 1.954139256400049e-05, |
|
"loss": 0.5348, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4032, |
|
"grad_norm": 0.6185891032218933, |
|
"learning_rate": 1.9434841787099804e-05, |
|
"loss": 0.5125, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 0.6374247074127197, |
|
"learning_rate": 1.9317525684566686e-05, |
|
"loss": 0.523, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4416, |
|
"grad_norm": 0.5338121652603149, |
|
"learning_rate": 1.918957811620231e-05, |
|
"loss": 0.5475, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 0.46796715259552, |
|
"learning_rate": 1.9051145072503216e-05, |
|
"loss": 0.5069, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.5485299825668335, |
|
"learning_rate": 1.8902384508083518e-05, |
|
"loss": 0.5207, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 0.4771216809749603, |
|
"learning_rate": 1.8743466161445823e-05, |
|
"loss": 0.4958, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5184, |
|
"grad_norm": 0.41287821531295776, |
|
"learning_rate": 1.857457136130651e-05, |
|
"loss": 0.4893, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 0.42082297801971436, |
|
"learning_rate": 1.839589281969639e-05, |
|
"loss": 0.5023, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5568, |
|
"grad_norm": 0.44994717836380005, |
|
"learning_rate": 1.8207634412072765e-05, |
|
"loss": 0.4896, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.4245263934135437, |
|
"learning_rate": 1.8010010944693846e-05, |
|
"loss": 0.5175, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5952, |
|
"grad_norm": 0.3610984981060028, |
|
"learning_rate": 1.780324790952092e-05, |
|
"loss": 0.4903, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6144, |
|
"grad_norm": 0.32125937938690186, |
|
"learning_rate": 1.758758122692791e-05, |
|
"loss": 0.4788, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6336, |
|
"grad_norm": 0.382164865732193, |
|
"learning_rate": 1.7363256976511972e-05, |
|
"loss": 0.4968, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6528, |
|
"grad_norm": 0.33746933937072754, |
|
"learning_rate": 1.7130531116312202e-05, |
|
"loss": 0.4759, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 0.38712188601493835, |
|
"learning_rate": 1.688966919075687e-05, |
|
"loss": 0.4855, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6912, |
|
"grad_norm": 0.3574568033218384, |
|
"learning_rate": 1.6640946027672395e-05, |
|
"loss": 0.4848, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7104, |
|
"grad_norm": 0.33736875653266907, |
|
"learning_rate": 1.6384645424699835e-05, |
|
"loss": 0.4924, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.7296, |
|
"grad_norm": 0.3253867030143738, |
|
"learning_rate": 1.612105982547663e-05, |
|
"loss": 0.4847, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7488, |
|
"grad_norm": 0.40177786350250244, |
|
"learning_rate": 1.5850489985953076e-05, |
|
"loss": 0.4889, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.41558143496513367, |
|
"learning_rate": 1.5573244631224364e-05, |
|
"loss": 0.4797, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7872, |
|
"grad_norm": 0.35887712240219116, |
|
"learning_rate": 1.5289640103269626e-05, |
|
"loss": 0.4884, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"grad_norm": 0.3083953559398651, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.4464, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8256, |
|
"grad_norm": 0.3898048996925354, |
|
"learning_rate": 1.4704654806027558e-05, |
|
"loss": 0.4762, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.8448, |
|
"grad_norm": 0.3718198239803314, |
|
"learning_rate": 1.4403941515576344e-05, |
|
"loss": 0.4554, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 0.4089162349700928, |
|
"learning_rate": 1.4098203247965876e-05, |
|
"loss": 0.4893, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8832, |
|
"grad_norm": 0.3150208592414856, |
|
"learning_rate": 1.3787788856105762e-05, |
|
"loss": 0.4668, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.9024, |
|
"grad_norm": 0.3481711447238922, |
|
"learning_rate": 1.3473052528448203e-05, |
|
"loss": 0.4757, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.9216, |
|
"grad_norm": 0.343963086605072, |
|
"learning_rate": 1.3154353384852559e-05, |
|
"loss": 0.487, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.9408, |
|
"grad_norm": 0.328693151473999, |
|
"learning_rate": 1.283205506682304e-05, |
|
"loss": 0.4862, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.3076961636543274, |
|
"learning_rate": 1.2506525322587207e-05, |
|
"loss": 0.4633, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9792, |
|
"grad_norm": 0.3493748605251312, |
|
"learning_rate": 1.2178135587488515e-05, |
|
"loss": 0.4542, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"grad_norm": 0.3048100769519806, |
|
"learning_rate": 1.1847260560171895e-05, |
|
"loss": 0.4683, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.0176, |
|
"grad_norm": 0.8771734237670898, |
|
"learning_rate": 1.1514277775045768e-05, |
|
"loss": 0.8037, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.0368, |
|
"grad_norm": 0.3285163640975952, |
|
"learning_rate": 1.1179567171508463e-05, |
|
"loss": 0.4256, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.056, |
|
"grad_norm": 0.3323395252227783, |
|
"learning_rate": 1.0843510660430447e-05, |
|
"loss": 0.3927, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0752, |
|
"grad_norm": 0.3365606963634491, |
|
"learning_rate": 1.0506491688387128e-05, |
|
"loss": 0.4463, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0944, |
|
"grad_norm": 0.31812021136283875, |
|
"learning_rate": 1.0168894800139311e-05, |
|
"loss": 0.4116, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.1136, |
|
"grad_norm": 0.38818055391311646, |
|
"learning_rate": 9.83110519986069e-06, |
|
"loss": 0.463, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.1328, |
|
"grad_norm": 0.30687442421913147, |
|
"learning_rate": 9.493508311612874e-06, |
|
"loss": 0.4379, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.152, |
|
"grad_norm": 0.2939879596233368, |
|
"learning_rate": 9.156489339569555e-06, |
|
"loss": 0.4133, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1712, |
|
"grad_norm": 0.3425482511520386, |
|
"learning_rate": 8.820432828491542e-06, |
|
"loss": 0.4336, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.1904, |
|
"grad_norm": 0.2907508909702301, |
|
"learning_rate": 8.485722224954237e-06, |
|
"loss": 0.4075, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.2096, |
|
"grad_norm": 0.3222837448120117, |
|
"learning_rate": 8.15273943982811e-06, |
|
"loss": 0.3901, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.2288000000000001, |
|
"grad_norm": 0.31006520986557007, |
|
"learning_rate": 7.821864412511485e-06, |
|
"loss": 0.4606, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.248, |
|
"grad_norm": 0.264212042093277, |
|
"learning_rate": 7.493474677412795e-06, |
|
"loss": 0.3955, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.2671999999999999, |
|
"grad_norm": 0.27319130301475525, |
|
"learning_rate": 7.16794493317696e-06, |
|
"loss": 0.4068, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.2864, |
|
"grad_norm": 0.28153327107429504, |
|
"learning_rate": 6.845646615147445e-06, |
|
"loss": 0.4234, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.3056, |
|
"grad_norm": 0.3072468042373657, |
|
"learning_rate": 6.526947471551799e-06, |
|
"loss": 0.4408, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.3248, |
|
"grad_norm": 0.2795600891113281, |
|
"learning_rate": 6.21221114389424e-06, |
|
"loss": 0.3869, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.3439999999999999, |
|
"grad_norm": 0.2556418180465698, |
|
"learning_rate": 5.901796752034128e-06, |
|
"loss": 0.3938, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3632, |
|
"grad_norm": 0.28127527236938477, |
|
"learning_rate": 5.5960584844236565e-06, |
|
"loss": 0.4234, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.3824, |
|
"grad_norm": 0.2638583481311798, |
|
"learning_rate": 5.295345193972445e-06, |
|
"loss": 0.4703, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.4016, |
|
"grad_norm": 0.2525731325149536, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.3932, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.4208, |
|
"grad_norm": 0.25701409578323364, |
|
"learning_rate": 4.710359896730379e-06, |
|
"loss": 0.4027, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.29077157378196716, |
|
"learning_rate": 4.426755368775637e-06, |
|
"loss": 0.4311, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.4592, |
|
"grad_norm": 0.2595595717430115, |
|
"learning_rate": 4.149510014046922e-06, |
|
"loss": 0.376, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.4784, |
|
"grad_norm": 0.30198609828948975, |
|
"learning_rate": 3.878940174523371e-06, |
|
"loss": 0.4595, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.4976, |
|
"grad_norm": 0.2653771936893463, |
|
"learning_rate": 3.6153545753001663e-06, |
|
"loss": 0.3897, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.5168, |
|
"grad_norm": 0.25373411178588867, |
|
"learning_rate": 3.3590539723276083e-06, |
|
"loss": 0.4137, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.536, |
|
"grad_norm": 0.22587552666664124, |
|
"learning_rate": 3.110330809243134e-06, |
|
"loss": 0.3905, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5552000000000001, |
|
"grad_norm": 0.2660175859928131, |
|
"learning_rate": 2.869468883687798e-06, |
|
"loss": 0.4473, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.5744, |
|
"grad_norm": 0.23747386038303375, |
|
"learning_rate": 2.6367430234880286e-06, |
|
"loss": 0.3892, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.5936, |
|
"grad_norm": 0.22769302129745483, |
|
"learning_rate": 2.4124187730720916e-06, |
|
"loss": 0.4159, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.6128, |
|
"grad_norm": 0.27097997069358826, |
|
"learning_rate": 2.196752090479083e-06, |
|
"loss": 0.3836, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.6320000000000001, |
|
"grad_norm": 0.2471567988395691, |
|
"learning_rate": 1.9899890553061565e-06, |
|
"loss": 0.409, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.6512, |
|
"grad_norm": 0.263854056596756, |
|
"learning_rate": 1.7923655879272395e-06, |
|
"loss": 0.4006, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.6703999999999999, |
|
"grad_norm": 0.23444706201553345, |
|
"learning_rate": 1.60410718030361e-06, |
|
"loss": 0.3883, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.6896, |
|
"grad_norm": 0.23784613609313965, |
|
"learning_rate": 1.425428638693489e-06, |
|
"loss": 0.4117, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.7088, |
|
"grad_norm": 0.25254783034324646, |
|
"learning_rate": 1.2565338385541792e-06, |
|
"loss": 0.4345, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.728, |
|
"grad_norm": 0.2234947681427002, |
|
"learning_rate": 1.097615491916485e-06, |
|
"loss": 0.4002, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7471999999999999, |
|
"grad_norm": 0.2332143634557724, |
|
"learning_rate": 9.488549274967873e-07, |
|
"loss": 0.4242, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.7664, |
|
"grad_norm": 0.21657587587833405, |
|
"learning_rate": 8.10421883797694e-07, |
|
"loss": 0.3806, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.7856, |
|
"grad_norm": 0.20751075446605682, |
|
"learning_rate": 6.824743154333157e-07, |
|
"loss": 0.4056, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.8048, |
|
"grad_norm": 0.2177955061197281, |
|
"learning_rate": 5.651582129001987e-07, |
|
"loss": 0.3898, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.8239999999999998, |
|
"grad_norm": 0.22828799486160278, |
|
"learning_rate": 4.5860743599951186e-07, |
|
"loss": 0.3878, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.8432, |
|
"grad_norm": 0.2466111183166504, |
|
"learning_rate": 3.629435611005916e-07, |
|
"loss": 0.4132, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.8624, |
|
"grad_norm": 0.21871791779994965, |
|
"learning_rate": 2.7827574242009434e-07, |
|
"loss": 0.3657, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.8816000000000002, |
|
"grad_norm": 0.2354569435119629, |
|
"learning_rate": 2.0470058747505516e-07, |
|
"loss": 0.4006, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.9008, |
|
"grad_norm": 0.20780694484710693, |
|
"learning_rate": 1.4230204685196202e-07, |
|
"loss": 0.3919, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.19831795990467072, |
|
"learning_rate": 9.11513184176116e-08, |
|
"loss": 0.4231, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9392, |
|
"grad_norm": 0.21838048100471497, |
|
"learning_rate": 5.1306766081048456e-08, |
|
"loss": 0.3911, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.9584000000000001, |
|
"grad_norm": 0.21957120299339294, |
|
"learning_rate": 2.2813853199292745e-08, |
|
"loss": 0.4391, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.9776, |
|
"grad_norm": 0.22633568942546844, |
|
"learning_rate": 5.705090702819993e-09, |
|
"loss": 0.4069, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.9968, |
|
"grad_norm": 0.2150619626045227, |
|
"learning_rate": 0.0, |
|
"loss": 0.4599, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.9968, |
|
"step": 104, |
|
"total_flos": 356056961384448.0, |
|
"train_loss": 0.48249776661396027, |
|
"train_runtime": 36791.677, |
|
"train_samples_per_second": 0.272, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 356056961384448.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|