|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 18025, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011095700416088765, |
|
"grad_norm": 0.6721351742744446, |
|
"learning_rate": 2.7739251040221912e-09, |
|
"loss": 1.1791, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002219140083217753, |
|
"grad_norm": 0.48403581976890564, |
|
"learning_rate": 5.5478502080443824e-09, |
|
"loss": 1.1797, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00332871012482663, |
|
"grad_norm": 0.328329473733902, |
|
"learning_rate": 8.321775312066573e-09, |
|
"loss": 1.1276, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004438280166435506, |
|
"grad_norm": 0.573009192943573, |
|
"learning_rate": 1.1095700416088765e-08, |
|
"loss": 1.1502, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005547850208044383, |
|
"grad_norm": 0.7344629764556885, |
|
"learning_rate": 1.3869625520110957e-08, |
|
"loss": 1.1745, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00665742024965326, |
|
"grad_norm": 0.4872736632823944, |
|
"learning_rate": 1.6643550624133146e-08, |
|
"loss": 1.1419, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.007766990291262136, |
|
"grad_norm": 0.42412033677101135, |
|
"learning_rate": 1.9417475728155338e-08, |
|
"loss": 1.1833, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.008876560332871012, |
|
"grad_norm": 0.41351547837257385, |
|
"learning_rate": 2.219140083217753e-08, |
|
"loss": 1.1966, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.009986130374479889, |
|
"grad_norm": 0.5638367533683777, |
|
"learning_rate": 2.4965325936199722e-08, |
|
"loss": 1.1935, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.011095700416088766, |
|
"grad_norm": 0.35621383786201477, |
|
"learning_rate": 2.7739251040221914e-08, |
|
"loss": 1.1403, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.012205270457697643, |
|
"grad_norm": 0.519483208656311, |
|
"learning_rate": 3.0513176144244106e-08, |
|
"loss": 1.1716, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01331484049930652, |
|
"grad_norm": 0.655170202255249, |
|
"learning_rate": 3.328710124826629e-08, |
|
"loss": 1.187, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.014424410540915394, |
|
"grad_norm": 0.30613973736763, |
|
"learning_rate": 3.606102635228848e-08, |
|
"loss": 1.1639, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.015533980582524271, |
|
"grad_norm": 0.6322771310806274, |
|
"learning_rate": 3.8834951456310675e-08, |
|
"loss": 1.1547, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.016643550624133148, |
|
"grad_norm": 0.622188925743103, |
|
"learning_rate": 4.1608876560332874e-08, |
|
"loss": 1.1878, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.017753120665742025, |
|
"grad_norm": 0.636248767375946, |
|
"learning_rate": 4.438280166435506e-08, |
|
"loss": 1.1298, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0188626907073509, |
|
"grad_norm": 0.5820329189300537, |
|
"learning_rate": 4.715672676837725e-08, |
|
"loss": 1.1284, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.019972260748959778, |
|
"grad_norm": 0.40758267045021057, |
|
"learning_rate": 4.9930651872399443e-08, |
|
"loss": 1.2361, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.021081830790568655, |
|
"grad_norm": 0.4012329578399658, |
|
"learning_rate": 5.270457697642163e-08, |
|
"loss": 1.1518, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.022191400832177532, |
|
"grad_norm": 0.5331623554229736, |
|
"learning_rate": 5.547850208044383e-08, |
|
"loss": 1.2203, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02330097087378641, |
|
"grad_norm": 0.5145316123962402, |
|
"learning_rate": 5.825242718446602e-08, |
|
"loss": 1.1253, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.024410540915395285, |
|
"grad_norm": 0.36710885167121887, |
|
"learning_rate": 6.102635228848821e-08, |
|
"loss": 1.0685, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.025520110957004162, |
|
"grad_norm": 0.3183213472366333, |
|
"learning_rate": 6.38002773925104e-08, |
|
"loss": 1.2097, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.02662968099861304, |
|
"grad_norm": 0.3870026171207428, |
|
"learning_rate": 6.657420249653258e-08, |
|
"loss": 1.1605, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.027739251040221916, |
|
"grad_norm": 0.5289874076843262, |
|
"learning_rate": 6.934812760055478e-08, |
|
"loss": 1.1163, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02884882108183079, |
|
"grad_norm": 0.6247478127479553, |
|
"learning_rate": 7.212205270457697e-08, |
|
"loss": 1.2463, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.029958391123439666, |
|
"grad_norm": 0.5780977010726929, |
|
"learning_rate": 7.489597780859917e-08, |
|
"loss": 1.195, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.031067961165048542, |
|
"grad_norm": 0.5350005626678467, |
|
"learning_rate": 7.766990291262135e-08, |
|
"loss": 1.1289, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.03217753120665742, |
|
"grad_norm": 0.30264171957969666, |
|
"learning_rate": 8.044382801664355e-08, |
|
"loss": 1.0648, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.033287101248266296, |
|
"grad_norm": 0.45016685128211975, |
|
"learning_rate": 8.321775312066575e-08, |
|
"loss": 1.153, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.034396671289875176, |
|
"grad_norm": 0.5521411895751953, |
|
"learning_rate": 8.599167822468793e-08, |
|
"loss": 1.1257, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.03550624133148405, |
|
"grad_norm": 0.4235968291759491, |
|
"learning_rate": 8.876560332871012e-08, |
|
"loss": 1.1198, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03661581137309293, |
|
"grad_norm": 0.5764958262443542, |
|
"learning_rate": 9.153952843273232e-08, |
|
"loss": 1.1015, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0377253814147018, |
|
"grad_norm": 0.5378324389457703, |
|
"learning_rate": 9.43134535367545e-08, |
|
"loss": 1.2342, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.038834951456310676, |
|
"grad_norm": 0.3303524851799011, |
|
"learning_rate": 9.708737864077669e-08, |
|
"loss": 1.1156, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.039944521497919556, |
|
"grad_norm": 0.5583963394165039, |
|
"learning_rate": 9.986130374479889e-08, |
|
"loss": 1.1579, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.04105409153952843, |
|
"grad_norm": 0.813584566116333, |
|
"learning_rate": 1.0263522884882107e-07, |
|
"loss": 1.1749, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.04216366158113731, |
|
"grad_norm": 0.6232322454452515, |
|
"learning_rate": 1.0540915395284326e-07, |
|
"loss": 1.195, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.04327323162274618, |
|
"grad_norm": 0.5598679780960083, |
|
"learning_rate": 1.0818307905686546e-07, |
|
"loss": 1.1183, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.044382801664355064, |
|
"grad_norm": 0.5374318361282349, |
|
"learning_rate": 1.1095700416088766e-07, |
|
"loss": 1.1944, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04549237170596394, |
|
"grad_norm": 0.19993217289447784, |
|
"learning_rate": 1.1373092926490985e-07, |
|
"loss": 1.0835, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.04660194174757282, |
|
"grad_norm": 0.4393330514431, |
|
"learning_rate": 1.1650485436893204e-07, |
|
"loss": 1.1977, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04771151178918169, |
|
"grad_norm": 0.6222187876701355, |
|
"learning_rate": 1.1927877947295422e-07, |
|
"loss": 1.0523, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.04882108183079057, |
|
"grad_norm": 0.319072425365448, |
|
"learning_rate": 1.2205270457697642e-07, |
|
"loss": 1.136, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.049930651872399444, |
|
"grad_norm": 0.5406301617622375, |
|
"learning_rate": 1.248266296809986e-07, |
|
"loss": 1.2155, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.051040221914008324, |
|
"grad_norm": 0.618211567401886, |
|
"learning_rate": 1.276005547850208e-07, |
|
"loss": 1.1562, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0521497919556172, |
|
"grad_norm": 0.3475450277328491, |
|
"learning_rate": 1.30374479889043e-07, |
|
"loss": 1.1551, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.05325936199722608, |
|
"grad_norm": 0.4210108518600464, |
|
"learning_rate": 1.3314840499306516e-07, |
|
"loss": 1.1612, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05436893203883495, |
|
"grad_norm": 0.386306494474411, |
|
"learning_rate": 1.3592233009708736e-07, |
|
"loss": 1.2195, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.05547850208044383, |
|
"grad_norm": 0.42582884430885315, |
|
"learning_rate": 1.3869625520110956e-07, |
|
"loss": 1.1139, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.056588072122052704, |
|
"grad_norm": 0.3357681930065155, |
|
"learning_rate": 1.4147018030513176e-07, |
|
"loss": 1.1757, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.05769764216366158, |
|
"grad_norm": 0.2897900938987732, |
|
"learning_rate": 1.4424410540915393e-07, |
|
"loss": 1.2098, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.05880721220527046, |
|
"grad_norm": 0.38025063276290894, |
|
"learning_rate": 1.4701803051317613e-07, |
|
"loss": 1.1234, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.05991678224687933, |
|
"grad_norm": 0.36913609504699707, |
|
"learning_rate": 1.4979195561719833e-07, |
|
"loss": 1.1552, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.06102635228848821, |
|
"grad_norm": 0.5480925440788269, |
|
"learning_rate": 1.525658807212205e-07, |
|
"loss": 1.135, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.062135922330097085, |
|
"grad_norm": 0.3799718916416168, |
|
"learning_rate": 1.553398058252427e-07, |
|
"loss": 1.1454, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.06324549237170596, |
|
"grad_norm": 0.3841489255428314, |
|
"learning_rate": 1.5811373092926493e-07, |
|
"loss": 1.1351, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.06435506241331485, |
|
"grad_norm": 0.29666101932525635, |
|
"learning_rate": 1.608876560332871e-07, |
|
"loss": 1.1616, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.06546463245492372, |
|
"grad_norm": 0.2889375686645508, |
|
"learning_rate": 1.636615811373093e-07, |
|
"loss": 1.1802, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.06657420249653259, |
|
"grad_norm": 0.5737839937210083, |
|
"learning_rate": 1.664355062413315e-07, |
|
"loss": 1.1667, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.06768377253814147, |
|
"grad_norm": 0.5003082156181335, |
|
"learning_rate": 1.6920943134535367e-07, |
|
"loss": 1.075, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.06879334257975035, |
|
"grad_norm": 0.46454185247421265, |
|
"learning_rate": 1.7198335644937587e-07, |
|
"loss": 1.1724, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.06990291262135923, |
|
"grad_norm": 0.32240554690361023, |
|
"learning_rate": 1.7475728155339807e-07, |
|
"loss": 1.1437, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.0710124826629681, |
|
"grad_norm": 0.42182767391204834, |
|
"learning_rate": 1.7753120665742024e-07, |
|
"loss": 0.9911, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.07212205270457697, |
|
"grad_norm": 0.4385708272457123, |
|
"learning_rate": 1.8030513176144244e-07, |
|
"loss": 1.0787, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.07323162274618586, |
|
"grad_norm": 0.3282943367958069, |
|
"learning_rate": 1.8307905686546463e-07, |
|
"loss": 1.1217, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.07434119278779473, |
|
"grad_norm": 0.7223221063613892, |
|
"learning_rate": 1.858529819694868e-07, |
|
"loss": 1.1546, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.0754507628294036, |
|
"grad_norm": 0.36656028032302856, |
|
"learning_rate": 1.88626907073509e-07, |
|
"loss": 1.163, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.07656033287101248, |
|
"grad_norm": 0.5122601389884949, |
|
"learning_rate": 1.914008321775312e-07, |
|
"loss": 1.053, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.07766990291262135, |
|
"grad_norm": 0.5633952021598816, |
|
"learning_rate": 1.9417475728155338e-07, |
|
"loss": 1.1306, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.07877947295423024, |
|
"grad_norm": 0.5684695243835449, |
|
"learning_rate": 1.9694868238557558e-07, |
|
"loss": 1.1589, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.07988904299583911, |
|
"grad_norm": 0.6536048054695129, |
|
"learning_rate": 1.9972260748959777e-07, |
|
"loss": 1.113, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.08099861303744799, |
|
"grad_norm": 0.4558582901954651, |
|
"learning_rate": 2.0249653259361995e-07, |
|
"loss": 1.0757, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.08210818307905686, |
|
"grad_norm": 0.6440749168395996, |
|
"learning_rate": 2.0527045769764214e-07, |
|
"loss": 1.1674, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.08321775312066575, |
|
"grad_norm": 0.41806939244270325, |
|
"learning_rate": 2.0804438280166434e-07, |
|
"loss": 1.0539, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08432732316227462, |
|
"grad_norm": 0.5335156321525574, |
|
"learning_rate": 2.1081830790568652e-07, |
|
"loss": 1.1338, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.0854368932038835, |
|
"grad_norm": 0.35202693939208984, |
|
"learning_rate": 2.1359223300970871e-07, |
|
"loss": 1.1638, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.08654646324549237, |
|
"grad_norm": 0.45304250717163086, |
|
"learning_rate": 2.163661581137309e-07, |
|
"loss": 1.0912, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.08765603328710125, |
|
"grad_norm": 0.37071916460990906, |
|
"learning_rate": 2.191400832177531e-07, |
|
"loss": 1.0883, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.08876560332871013, |
|
"grad_norm": 0.5757469534873962, |
|
"learning_rate": 2.219140083217753e-07, |
|
"loss": 1.0081, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.089875173370319, |
|
"grad_norm": 0.4922785460948944, |
|
"learning_rate": 2.246879334257975e-07, |
|
"loss": 1.0775, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.09098474341192787, |
|
"grad_norm": 0.6358697414398193, |
|
"learning_rate": 2.274618585298197e-07, |
|
"loss": 1.1156, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.09209431345353676, |
|
"grad_norm": 0.33512547612190247, |
|
"learning_rate": 2.3023578363384188e-07, |
|
"loss": 1.049, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.09320388349514563, |
|
"grad_norm": 0.3588186502456665, |
|
"learning_rate": 2.3300970873786408e-07, |
|
"loss": 1.0028, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.09431345353675451, |
|
"grad_norm": 0.4455154836177826, |
|
"learning_rate": 2.3578363384188628e-07, |
|
"loss": 0.9894, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.09542302357836338, |
|
"grad_norm": 0.4005114734172821, |
|
"learning_rate": 2.3855755894590845e-07, |
|
"loss": 1.0441, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.09653259361997225, |
|
"grad_norm": 0.3630480170249939, |
|
"learning_rate": 2.413314840499306e-07, |
|
"loss": 1.1274, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.09764216366158114, |
|
"grad_norm": 0.38374799489974976, |
|
"learning_rate": 2.4410540915395285e-07, |
|
"loss": 1.0205, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.09875173370319001, |
|
"grad_norm": 0.240658700466156, |
|
"learning_rate": 2.46879334257975e-07, |
|
"loss": 0.9917, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.09986130374479889, |
|
"grad_norm": 0.32337549328804016, |
|
"learning_rate": 2.496532593619972e-07, |
|
"loss": 1.0758, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.10097087378640776, |
|
"grad_norm": 0.47185397148132324, |
|
"learning_rate": 2.524271844660194e-07, |
|
"loss": 1.1119, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.10208044382801665, |
|
"grad_norm": 0.5956501960754395, |
|
"learning_rate": 2.552011095700416e-07, |
|
"loss": 1.043, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.10319001386962552, |
|
"grad_norm": 0.36230626702308655, |
|
"learning_rate": 2.5797503467406376e-07, |
|
"loss": 1.0284, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.1042995839112344, |
|
"grad_norm": 0.4904063642024994, |
|
"learning_rate": 2.60748959778086e-07, |
|
"loss": 1.0458, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.10540915395284327, |
|
"grad_norm": 0.3035784661769867, |
|
"learning_rate": 2.6352288488210816e-07, |
|
"loss": 1.0256, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.10651872399445216, |
|
"grad_norm": 0.5605130791664124, |
|
"learning_rate": 2.6629680998613033e-07, |
|
"loss": 1.0233, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.10762829403606103, |
|
"grad_norm": 0.3493014872074127, |
|
"learning_rate": 2.6907073509015255e-07, |
|
"loss": 1.0023, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.1087378640776699, |
|
"grad_norm": 0.5957789421081543, |
|
"learning_rate": 2.7184466019417473e-07, |
|
"loss": 1.0315, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.10984743411927878, |
|
"grad_norm": 0.6720208525657654, |
|
"learning_rate": 2.746185852981969e-07, |
|
"loss": 1.0138, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.11095700416088766, |
|
"grad_norm": 0.43460479378700256, |
|
"learning_rate": 2.773925104022191e-07, |
|
"loss": 0.9863, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11206657420249654, |
|
"grad_norm": 0.5312954783439636, |
|
"learning_rate": 2.801664355062413e-07, |
|
"loss": 1.0486, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.11317614424410541, |
|
"grad_norm": 0.6359843611717224, |
|
"learning_rate": 2.829403606102635e-07, |
|
"loss": 0.9457, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 0.6254010796546936, |
|
"learning_rate": 2.857142857142857e-07, |
|
"loss": 1.1109, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.11539528432732316, |
|
"grad_norm": 0.36780887842178345, |
|
"learning_rate": 2.8848821081830787e-07, |
|
"loss": 1.0425, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.11650485436893204, |
|
"grad_norm": 0.46995213627815247, |
|
"learning_rate": 2.912621359223301e-07, |
|
"loss": 0.9666, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.11761442441054092, |
|
"grad_norm": 0.49816495180130005, |
|
"learning_rate": 2.9403606102635226e-07, |
|
"loss": 0.9935, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.11872399445214979, |
|
"grad_norm": 0.9470138549804688, |
|
"learning_rate": 2.9680998613037444e-07, |
|
"loss": 1.0937, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.11983356449375866, |
|
"grad_norm": 0.4583646357059479, |
|
"learning_rate": 2.9958391123439666e-07, |
|
"loss": 0.9972, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.12094313453536755, |
|
"grad_norm": 0.351114958524704, |
|
"learning_rate": 3.0235783633841883e-07, |
|
"loss": 1.0061, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.12205270457697642, |
|
"grad_norm": 0.4335211515426636, |
|
"learning_rate": 3.05131761442441e-07, |
|
"loss": 1.0091, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.1231622746185853, |
|
"grad_norm": 0.3483923673629761, |
|
"learning_rate": 3.0790568654646323e-07, |
|
"loss": 1.0092, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.12427184466019417, |
|
"grad_norm": 0.33233171701431274, |
|
"learning_rate": 3.106796116504854e-07, |
|
"loss": 1.0185, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.12538141470180306, |
|
"grad_norm": 0.34460940957069397, |
|
"learning_rate": 3.1345353675450763e-07, |
|
"loss": 0.9484, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.12649098474341192, |
|
"grad_norm": 0.42694029211997986, |
|
"learning_rate": 3.1622746185852985e-07, |
|
"loss": 0.9484, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.1276005547850208, |
|
"grad_norm": 0.5112186074256897, |
|
"learning_rate": 3.19001386962552e-07, |
|
"loss": 1.0039, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.1287101248266297, |
|
"grad_norm": 0.4560784101486206, |
|
"learning_rate": 3.217753120665742e-07, |
|
"loss": 0.9961, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.12981969486823855, |
|
"grad_norm": 0.5053315162658691, |
|
"learning_rate": 3.245492371705964e-07, |
|
"loss": 0.9542, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.13092926490984744, |
|
"grad_norm": 0.6796769499778748, |
|
"learning_rate": 3.273231622746186e-07, |
|
"loss": 0.9187, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.13203883495145632, |
|
"grad_norm": 0.7734983563423157, |
|
"learning_rate": 3.3009708737864077e-07, |
|
"loss": 0.9298, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.13314840499306518, |
|
"grad_norm": 0.4399431049823761, |
|
"learning_rate": 3.32871012482663e-07, |
|
"loss": 0.8959, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.13425797503467407, |
|
"grad_norm": 0.4783932864665985, |
|
"learning_rate": 3.3564493758668516e-07, |
|
"loss": 0.8539, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.13536754507628293, |
|
"grad_norm": 0.4672847092151642, |
|
"learning_rate": 3.3841886269070734e-07, |
|
"loss": 0.8839, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.13647711511789182, |
|
"grad_norm": 0.4219910204410553, |
|
"learning_rate": 3.4119278779472956e-07, |
|
"loss": 0.934, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.1375866851595007, |
|
"grad_norm": 0.3283788561820984, |
|
"learning_rate": 3.4396671289875173e-07, |
|
"loss": 0.8729, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.13869625520110956, |
|
"grad_norm": 0.6127363443374634, |
|
"learning_rate": 3.467406380027739e-07, |
|
"loss": 0.8355, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.13980582524271845, |
|
"grad_norm": 0.8837600350379944, |
|
"learning_rate": 3.4951456310679613e-07, |
|
"loss": 0.9256, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.1409153952843273, |
|
"grad_norm": 0.3368714153766632, |
|
"learning_rate": 3.522884882108183e-07, |
|
"loss": 0.8326, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.1420249653259362, |
|
"grad_norm": 0.6457244753837585, |
|
"learning_rate": 3.550624133148405e-07, |
|
"loss": 0.8638, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.14313453536754508, |
|
"grad_norm": 0.5497669577598572, |
|
"learning_rate": 3.578363384188627e-07, |
|
"loss": 0.8049, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.14424410540915394, |
|
"grad_norm": 0.5958977341651917, |
|
"learning_rate": 3.6061026352288487e-07, |
|
"loss": 0.8107, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.14535367545076283, |
|
"grad_norm": 0.5878711938858032, |
|
"learning_rate": 3.6338418862690704e-07, |
|
"loss": 0.8011, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.14646324549237172, |
|
"grad_norm": 0.4262014627456665, |
|
"learning_rate": 3.6615811373092927e-07, |
|
"loss": 0.898, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.14757281553398058, |
|
"grad_norm": 0.7306149005889893, |
|
"learning_rate": 3.6893203883495144e-07, |
|
"loss": 0.7726, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.14868238557558947, |
|
"grad_norm": 0.32822510600090027, |
|
"learning_rate": 3.717059639389736e-07, |
|
"loss": 0.775, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.14979195561719832, |
|
"grad_norm": 0.41548779606819153, |
|
"learning_rate": 3.7447988904299584e-07, |
|
"loss": 0.7613, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1509015256588072, |
|
"grad_norm": 1.1839288473129272, |
|
"learning_rate": 3.77253814147018e-07, |
|
"loss": 0.8496, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.1520110957004161, |
|
"grad_norm": 0.5219757556915283, |
|
"learning_rate": 3.800277392510402e-07, |
|
"loss": 0.7402, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.15312066574202496, |
|
"grad_norm": 0.8173393607139587, |
|
"learning_rate": 3.828016643550624e-07, |
|
"loss": 0.7204, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.15423023578363385, |
|
"grad_norm": 0.49754881858825684, |
|
"learning_rate": 3.855755894590846e-07, |
|
"loss": 0.7716, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.1553398058252427, |
|
"grad_norm": 0.39697808027267456, |
|
"learning_rate": 3.8834951456310675e-07, |
|
"loss": 0.7791, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1564493758668516, |
|
"grad_norm": 0.6214376091957092, |
|
"learning_rate": 3.91123439667129e-07, |
|
"loss": 0.6724, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.15755894590846048, |
|
"grad_norm": 0.6486151218414307, |
|
"learning_rate": 3.9389736477115115e-07, |
|
"loss": 0.8015, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.15866851595006934, |
|
"grad_norm": 0.5499553084373474, |
|
"learning_rate": 3.966712898751733e-07, |
|
"loss": 0.7871, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.15977808599167823, |
|
"grad_norm": 0.8797757029533386, |
|
"learning_rate": 3.9944521497919555e-07, |
|
"loss": 0.7183, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.1608876560332871, |
|
"grad_norm": 0.47135302424430847, |
|
"learning_rate": 4.022191400832177e-07, |
|
"loss": 0.7348, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.16199722607489597, |
|
"grad_norm": 0.8005576729774475, |
|
"learning_rate": 4.049930651872399e-07, |
|
"loss": 0.6212, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.16310679611650486, |
|
"grad_norm": 0.47837623953819275, |
|
"learning_rate": 4.077669902912621e-07, |
|
"loss": 0.6812, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.16421636615811372, |
|
"grad_norm": 0.36638781428337097, |
|
"learning_rate": 4.105409153952843e-07, |
|
"loss": 0.6925, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.1653259361997226, |
|
"grad_norm": 0.817538857460022, |
|
"learning_rate": 4.1331484049930646e-07, |
|
"loss": 0.6186, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.1664355062413315, |
|
"grad_norm": 0.5090010166168213, |
|
"learning_rate": 4.160887656033287e-07, |
|
"loss": 0.6844, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16754507628294035, |
|
"grad_norm": 0.6102781295776367, |
|
"learning_rate": 4.1886269070735086e-07, |
|
"loss": 0.6943, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.16865464632454924, |
|
"grad_norm": 0.8231751918792725, |
|
"learning_rate": 4.2163661581137303e-07, |
|
"loss": 0.6391, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.16976421636615813, |
|
"grad_norm": 0.38910776376724243, |
|
"learning_rate": 4.2441054091539526e-07, |
|
"loss": 0.6937, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.170873786407767, |
|
"grad_norm": 0.5838291049003601, |
|
"learning_rate": 4.2718446601941743e-07, |
|
"loss": 0.5791, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.17198335644937587, |
|
"grad_norm": 0.519530177116394, |
|
"learning_rate": 4.299583911234396e-07, |
|
"loss": 0.7412, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.17309292649098473, |
|
"grad_norm": 0.45696595311164856, |
|
"learning_rate": 4.327323162274618e-07, |
|
"loss": 0.6469, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.17420249653259362, |
|
"grad_norm": 0.6771582961082458, |
|
"learning_rate": 4.35506241331484e-07, |
|
"loss": 0.6441, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.1753120665742025, |
|
"grad_norm": 0.559917151927948, |
|
"learning_rate": 4.382801664355062e-07, |
|
"loss": 0.5778, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.17642163661581137, |
|
"grad_norm": 0.9249961376190186, |
|
"learning_rate": 4.4105409153952845e-07, |
|
"loss": 0.6637, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.17753120665742025, |
|
"grad_norm": 0.5211077928543091, |
|
"learning_rate": 4.438280166435506e-07, |
|
"loss": 0.7047, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1786407766990291, |
|
"grad_norm": 0.7488894462585449, |
|
"learning_rate": 4.4660194174757285e-07, |
|
"loss": 0.5802, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.179750346740638, |
|
"grad_norm": 0.6046866774559021, |
|
"learning_rate": 4.49375866851595e-07, |
|
"loss": 0.6601, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.1808599167822469, |
|
"grad_norm": 0.3715108036994934, |
|
"learning_rate": 4.521497919556172e-07, |
|
"loss": 0.6094, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.18196948682385575, |
|
"grad_norm": 0.5831759572029114, |
|
"learning_rate": 4.549237170596394e-07, |
|
"loss": 0.6086, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.18307905686546463, |
|
"grad_norm": 0.595746636390686, |
|
"learning_rate": 4.576976421636616e-07, |
|
"loss": 0.5916, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.18418862690707352, |
|
"grad_norm": 0.48339492082595825, |
|
"learning_rate": 4.6047156726768376e-07, |
|
"loss": 0.6127, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.18529819694868238, |
|
"grad_norm": 1.626143455505371, |
|
"learning_rate": 4.63245492371706e-07, |
|
"loss": 0.5437, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.18640776699029127, |
|
"grad_norm": 0.3789680004119873, |
|
"learning_rate": 4.6601941747572816e-07, |
|
"loss": 0.6499, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.18751733703190013, |
|
"grad_norm": 0.5178479552268982, |
|
"learning_rate": 4.6879334257975033e-07, |
|
"loss": 0.6509, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.18862690707350901, |
|
"grad_norm": 0.5709561109542847, |
|
"learning_rate": 4.7156726768377255e-07, |
|
"loss": 0.6429, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1897364771151179, |
|
"grad_norm": 0.3643471896648407, |
|
"learning_rate": 4.743411927877947e-07, |
|
"loss": 0.6235, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.19084604715672676, |
|
"grad_norm": 0.5804113745689392, |
|
"learning_rate": 4.771151178918169e-07, |
|
"loss": 0.7341, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.19195561719833565, |
|
"grad_norm": 0.5089621543884277, |
|
"learning_rate": 4.798890429958391e-07, |
|
"loss": 0.6286, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.1930651872399445, |
|
"grad_norm": 0.4625658392906189, |
|
"learning_rate": 4.826629680998612e-07, |
|
"loss": 0.6128, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.1941747572815534, |
|
"grad_norm": 0.36961832642555237, |
|
"learning_rate": 4.854368932038835e-07, |
|
"loss": 0.6213, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.19528432732316228, |
|
"grad_norm": 0.4466856122016907, |
|
"learning_rate": 4.882108183079057e-07, |
|
"loss": 0.5383, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.19639389736477114, |
|
"grad_norm": 0.45287024974823, |
|
"learning_rate": 4.909847434119279e-07, |
|
"loss": 0.5064, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.19750346740638003, |
|
"grad_norm": 0.6351368427276611, |
|
"learning_rate": 4.9375866851595e-07, |
|
"loss": 0.5485, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.19861303744798892, |
|
"grad_norm": 0.46472978591918945, |
|
"learning_rate": 4.965325936199722e-07, |
|
"loss": 0.5392, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.19972260748959778, |
|
"grad_norm": 0.38963034749031067, |
|
"learning_rate": 4.993065187239944e-07, |
|
"loss": 0.5459, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.20083217753120666, |
|
"grad_norm": 2.1769580841064453, |
|
"learning_rate": 4.999986650611594e-07, |
|
"loss": 0.5158, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.20194174757281552, |
|
"grad_norm": 0.6485143899917603, |
|
"learning_rate": 4.999927320283929e-07, |
|
"loss": 0.5815, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2030513176144244, |
|
"grad_norm": 0.37338986992836, |
|
"learning_rate": 4.999820526876891e-07, |
|
"loss": 0.5475, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.2041608876560333, |
|
"grad_norm": 0.4554106593132019, |
|
"learning_rate": 4.999666272418033e-07, |
|
"loss": 0.547, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.20527045769764216, |
|
"grad_norm": 0.3950905501842499, |
|
"learning_rate": 4.999464559835997e-07, |
|
"loss": 0.5561, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.20638002773925104, |
|
"grad_norm": 0.28335675597190857, |
|
"learning_rate": 4.999215392960455e-07, |
|
"loss": 0.6461, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.20748959778085993, |
|
"grad_norm": 0.3045244514942169, |
|
"learning_rate": 4.998918776522036e-07, |
|
"loss": 0.5206, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.2085991678224688, |
|
"grad_norm": 0.4339936375617981, |
|
"learning_rate": 4.998574716152234e-07, |
|
"loss": 0.4728, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.20970873786407768, |
|
"grad_norm": 0.39513078331947327, |
|
"learning_rate": 4.998183218383305e-07, |
|
"loss": 0.5485, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.21081830790568654, |
|
"grad_norm": 0.40521058440208435, |
|
"learning_rate": 4.997744290648143e-07, |
|
"loss": 0.6388, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.21192787794729542, |
|
"grad_norm": 0.3975263833999634, |
|
"learning_rate": 4.997257941280133e-07, |
|
"loss": 0.5521, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.2130374479889043, |
|
"grad_norm": 0.3691408634185791, |
|
"learning_rate": 4.996724179512999e-07, |
|
"loss": 0.5293, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.21414701803051317, |
|
"grad_norm": 0.30931538343429565, |
|
"learning_rate": 4.996143015480629e-07, |
|
"loss": 0.6779, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.21525658807212206, |
|
"grad_norm": 0.4070769250392914, |
|
"learning_rate": 4.995514460216873e-07, |
|
"loss": 0.4724, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.21636615811373092, |
|
"grad_norm": 0.34178927540779114, |
|
"learning_rate": 4.994838525655349e-07, |
|
"loss": 0.4932, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.2174757281553398, |
|
"grad_norm": 0.3776053190231323, |
|
"learning_rate": 4.994115224629204e-07, |
|
"loss": 0.513, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.2185852981969487, |
|
"grad_norm": 0.3009076416492462, |
|
"learning_rate": 4.993344570870874e-07, |
|
"loss": 0.4694, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.21969486823855755, |
|
"grad_norm": 0.24924315512180328, |
|
"learning_rate": 4.992526579011823e-07, |
|
"loss": 0.5135, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.22080443828016644, |
|
"grad_norm": 0.39235708117485046, |
|
"learning_rate": 4.991661264582271e-07, |
|
"loss": 0.5608, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.22191400832177532, |
|
"grad_norm": 0.34350383281707764, |
|
"learning_rate": 4.990748644010888e-07, |
|
"loss": 0.5201, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.22302357836338418, |
|
"grad_norm": 0.5332874059677124, |
|
"learning_rate": 4.989788734624492e-07, |
|
"loss": 0.5994, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.22413314840499307, |
|
"grad_norm": 0.32011643052101135, |
|
"learning_rate": 4.988781554647714e-07, |
|
"loss": 0.5103, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.22524271844660193, |
|
"grad_norm": 0.37372103333473206, |
|
"learning_rate": 4.987727123202655e-07, |
|
"loss": 0.5483, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.22635228848821082, |
|
"grad_norm": 0.3511541187763214, |
|
"learning_rate": 4.986625460308524e-07, |
|
"loss": 0.5508, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.2274618585298197, |
|
"grad_norm": 0.33943256735801697, |
|
"learning_rate": 4.985476586881254e-07, |
|
"loss": 0.5437, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.5390433669090271, |
|
"learning_rate": 4.984280524733107e-07, |
|
"loss": 0.5161, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.22968099861303745, |
|
"grad_norm": 0.40753117203712463, |
|
"learning_rate": 4.983037296572259e-07, |
|
"loss": 0.4993, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.2307905686546463, |
|
"grad_norm": 0.34956789016723633, |
|
"learning_rate": 4.981746926002372e-07, |
|
"loss": 0.613, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.2319001386962552, |
|
"grad_norm": 0.3175369203090668, |
|
"learning_rate": 4.980409437522143e-07, |
|
"loss": 0.5396, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.23300970873786409, |
|
"grad_norm": 0.30835628509521484, |
|
"learning_rate": 4.979024856524839e-07, |
|
"loss": 0.5407, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.23411927877947294, |
|
"grad_norm": 0.3639371693134308, |
|
"learning_rate": 4.977593209297814e-07, |
|
"loss": 0.5457, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.23522884882108183, |
|
"grad_norm": 0.4240809381008148, |
|
"learning_rate": 4.976114523022015e-07, |
|
"loss": 0.5323, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.23633841886269072, |
|
"grad_norm": 0.509510338306427, |
|
"learning_rate": 4.974588825771457e-07, |
|
"loss": 0.5374, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.23744798890429958, |
|
"grad_norm": 0.3038425147533417, |
|
"learning_rate": 4.9730161465127e-07, |
|
"loss": 0.5041, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.23855755894590847, |
|
"grad_norm": 0.33369964361190796, |
|
"learning_rate": 4.971396515104292e-07, |
|
"loss": 0.601, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.23966712898751732, |
|
"grad_norm": 0.4764558970928192, |
|
"learning_rate": 4.969729962296203e-07, |
|
"loss": 0.6066, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.2407766990291262, |
|
"grad_norm": 0.4532679319381714, |
|
"learning_rate": 4.968016519729246e-07, |
|
"loss": 0.5999, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.2418862690707351, |
|
"grad_norm": 0.39469200372695923, |
|
"learning_rate": 4.966256219934471e-07, |
|
"loss": 0.498, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.24299583911234396, |
|
"grad_norm": 0.4191853702068329, |
|
"learning_rate": 4.964449096332547e-07, |
|
"loss": 0.5246, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.24410540915395285, |
|
"grad_norm": 0.3406555950641632, |
|
"learning_rate": 4.962595183233133e-07, |
|
"loss": 0.6331, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.24521497919556173, |
|
"grad_norm": 0.32338958978652954, |
|
"learning_rate": 4.960694515834224e-07, |
|
"loss": 0.5389, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.2463245492371706, |
|
"grad_norm": 0.8198554515838623, |
|
"learning_rate": 4.958747130221477e-07, |
|
"loss": 0.5678, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.24743411927877948, |
|
"grad_norm": 0.38297727704048157, |
|
"learning_rate": 4.956753063367537e-07, |
|
"loss": 0.4682, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.24854368932038834, |
|
"grad_norm": 0.36951327323913574, |
|
"learning_rate": 4.954712353131323e-07, |
|
"loss": 0.4903, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.24965325936199723, |
|
"grad_norm": 0.30048689246177673, |
|
"learning_rate": 4.952625038257321e-07, |
|
"loss": 0.6061, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2507628294036061, |
|
"grad_norm": 0.2912724018096924, |
|
"learning_rate": 4.950491158374837e-07, |
|
"loss": 0.565, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.251872399445215, |
|
"grad_norm": 0.5042518377304077, |
|
"learning_rate": 4.948310753997254e-07, |
|
"loss": 0.5231, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.25298196948682383, |
|
"grad_norm": 0.31980255246162415, |
|
"learning_rate": 4.94608386652126e-07, |
|
"loss": 0.5077, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.2540915395284327, |
|
"grad_norm": 0.2944648861885071, |
|
"learning_rate": 4.943810538226056e-07, |
|
"loss": 0.4751, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.2552011095700416, |
|
"grad_norm": 0.24331466853618622, |
|
"learning_rate": 4.941490812272563e-07, |
|
"loss": 0.5061, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.2563106796116505, |
|
"grad_norm": 0.308380663394928, |
|
"learning_rate": 4.939124732702595e-07, |
|
"loss": 0.5207, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.2574202496532594, |
|
"grad_norm": 0.3826179802417755, |
|
"learning_rate": 4.936712344438028e-07, |
|
"loss": 0.5081, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.2585298196948682, |
|
"grad_norm": 0.41639629006385803, |
|
"learning_rate": 4.934253693279943e-07, |
|
"loss": 0.5334, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.2596393897364771, |
|
"grad_norm": 0.38280215859413147, |
|
"learning_rate": 4.931748825907759e-07, |
|
"loss": 0.5957, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.260748959778086, |
|
"grad_norm": 0.3500733971595764, |
|
"learning_rate": 4.929197789878347e-07, |
|
"loss": 0.5426, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.2618585298196949, |
|
"grad_norm": 0.4630734324455261, |
|
"learning_rate": 4.926600633625126e-07, |
|
"loss": 0.539, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.26296809986130376, |
|
"grad_norm": 0.3602588474750519, |
|
"learning_rate": 4.92395740645714e-07, |
|
"loss": 0.4219, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.26407766990291265, |
|
"grad_norm": 0.28921574354171753, |
|
"learning_rate": 4.92126815855813e-07, |
|
"loss": 0.5068, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.2651872399445215, |
|
"grad_norm": 0.298486590385437, |
|
"learning_rate": 4.918532940985576e-07, |
|
"loss": 0.5365, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.26629680998613037, |
|
"grad_norm": 0.32068467140197754, |
|
"learning_rate": 4.915751805669725e-07, |
|
"loss": 0.5623, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.26740638002773925, |
|
"grad_norm": 0.2494667023420334, |
|
"learning_rate": 4.912924805412613e-07, |
|
"loss": 0.5911, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.26851595006934814, |
|
"grad_norm": 0.3729526698589325, |
|
"learning_rate": 4.910051993887053e-07, |
|
"loss": 0.6284, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.26962552011095703, |
|
"grad_norm": 0.36454734206199646, |
|
"learning_rate": 4.907133425635625e-07, |
|
"loss": 0.5695, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.27073509015256586, |
|
"grad_norm": 0.3649137318134308, |
|
"learning_rate": 4.904169156069633e-07, |
|
"loss": 0.5287, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.27184466019417475, |
|
"grad_norm": 0.44556987285614014, |
|
"learning_rate": 4.90115924146806e-07, |
|
"loss": 0.5561, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.27295423023578363, |
|
"grad_norm": 0.3902558386325836, |
|
"learning_rate": 4.898103738976491e-07, |
|
"loss": 0.5358, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.2740638002773925, |
|
"grad_norm": 0.38199660181999207, |
|
"learning_rate": 4.895002706606037e-07, |
|
"loss": 0.5221, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.2751733703190014, |
|
"grad_norm": 0.7920161485671997, |
|
"learning_rate": 4.891856203232228e-07, |
|
"loss": 0.552, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.27628294036061024, |
|
"grad_norm": 0.40676286816596985, |
|
"learning_rate": 4.888664288593896e-07, |
|
"loss": 0.563, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.27739251040221913, |
|
"grad_norm": 0.2597495913505554, |
|
"learning_rate": 4.885427023292043e-07, |
|
"loss": 0.5276, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.278502080443828, |
|
"grad_norm": 0.408184289932251, |
|
"learning_rate": 4.882144468788685e-07, |
|
"loss": 0.4505, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.2796116504854369, |
|
"grad_norm": 0.31909486651420593, |
|
"learning_rate": 4.878816687405694e-07, |
|
"loss": 0.5883, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.2807212205270458, |
|
"grad_norm": 0.6439054012298584, |
|
"learning_rate": 4.875443742323607e-07, |
|
"loss": 0.5181, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.2818307905686546, |
|
"grad_norm": 0.3375921845436096, |
|
"learning_rate": 4.872025697580431e-07, |
|
"loss": 0.5607, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.2829403606102635, |
|
"grad_norm": 0.30383211374282837, |
|
"learning_rate": 4.868562618070422e-07, |
|
"loss": 0.517, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.2840499306518724, |
|
"grad_norm": 0.40573009848594666, |
|
"learning_rate": 4.865054569542859e-07, |
|
"loss": 0.5974, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.2851595006934813, |
|
"grad_norm": 0.2704174518585205, |
|
"learning_rate": 4.861501618600794e-07, |
|
"loss": 0.4676, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.28626907073509017, |
|
"grad_norm": 0.30422112345695496, |
|
"learning_rate": 4.857903832699784e-07, |
|
"loss": 0.5631, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.287378640776699, |
|
"grad_norm": 0.48548394441604614, |
|
"learning_rate": 4.854261280146615e-07, |
|
"loss": 0.6646, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.2884882108183079, |
|
"grad_norm": 0.49318018555641174, |
|
"learning_rate": 4.850574030097999e-07, |
|
"loss": 0.5939, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.2895977808599168, |
|
"grad_norm": 0.369650661945343, |
|
"learning_rate": 4.846842152559272e-07, |
|
"loss": 0.5602, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.29070735090152566, |
|
"grad_norm": 0.5085413455963135, |
|
"learning_rate": 4.843065718383051e-07, |
|
"loss": 0.5528, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.29181692094313455, |
|
"grad_norm": 0.30005863308906555, |
|
"learning_rate": 4.839244799267899e-07, |
|
"loss": 0.5668, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.29292649098474344, |
|
"grad_norm": 1.0485389232635498, |
|
"learning_rate": 4.83537946775696e-07, |
|
"loss": 0.5668, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.29403606102635227, |
|
"grad_norm": 0.38798120617866516, |
|
"learning_rate": 4.831469797236582e-07, |
|
"loss": 0.5526, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.29514563106796116, |
|
"grad_norm": 0.42610159516334534, |
|
"learning_rate": 4.827515861934924e-07, |
|
"loss": 0.5549, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.29625520110957004, |
|
"grad_norm": 0.2633446753025055, |
|
"learning_rate": 4.823517736920546e-07, |
|
"loss": 0.5283, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.29736477115117893, |
|
"grad_norm": 0.4579598903656006, |
|
"learning_rate": 4.819475498100985e-07, |
|
"loss": 0.5362, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.2984743411927878, |
|
"grad_norm": 0.39916613698005676, |
|
"learning_rate": 4.815389222221313e-07, |
|
"loss": 0.4562, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.29958391123439665, |
|
"grad_norm": 0.559020459651947, |
|
"learning_rate": 4.81125898686268e-07, |
|
"loss": 0.481, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.30069348127600554, |
|
"grad_norm": 0.40768253803253174, |
|
"learning_rate": 4.80708487044084e-07, |
|
"loss": 0.5346, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.3018030513176144, |
|
"grad_norm": 0.22490708529949188, |
|
"learning_rate": 4.802866952204667e-07, |
|
"loss": 0.5692, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.3029126213592233, |
|
"grad_norm": 0.3096957504749298, |
|
"learning_rate": 4.798605312234643e-07, |
|
"loss": 0.5559, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.3040221914008322, |
|
"grad_norm": 0.3028647303581238, |
|
"learning_rate": 4.794300031441342e-07, |
|
"loss": 0.5313, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.30513176144244103, |
|
"grad_norm": 0.7425801753997803, |
|
"learning_rate": 4.789951191563895e-07, |
|
"loss": 0.4875, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.3062413314840499, |
|
"grad_norm": 0.25657814741134644, |
|
"learning_rate": 4.785558875168434e-07, |
|
"loss": 0.4611, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.3073509015256588, |
|
"grad_norm": 0.374449759721756, |
|
"learning_rate": 4.781123165646529e-07, |
|
"loss": 0.5818, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.3084604715672677, |
|
"grad_norm": 0.3681221008300781, |
|
"learning_rate": 4.776644147213602e-07, |
|
"loss": 0.4757, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.3095700416088766, |
|
"grad_norm": 0.5280266404151917, |
|
"learning_rate": 4.772121904907328e-07, |
|
"loss": 0.4936, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.3106796116504854, |
|
"grad_norm": 0.447544664144516, |
|
"learning_rate": 4.7675565245860195e-07, |
|
"loss": 0.5231, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.3117891816920943, |
|
"grad_norm": 0.33150920271873474, |
|
"learning_rate": 4.7629480929270014e-07, |
|
"loss": 0.5644, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.3128987517337032, |
|
"grad_norm": 0.3428071141242981, |
|
"learning_rate": 4.7582966974249607e-07, |
|
"loss": 0.6091, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.31400832177531207, |
|
"grad_norm": 0.4063955545425415, |
|
"learning_rate": 4.753602426390285e-07, |
|
"loss": 0.5079, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.31511789181692096, |
|
"grad_norm": 0.25844889879226685, |
|
"learning_rate": 4.7488653689473903e-07, |
|
"loss": 0.6156, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.31622746185852985, |
|
"grad_norm": 0.2735048234462738, |
|
"learning_rate": 4.744085615033023e-07, |
|
"loss": 0.5386, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3173370319001387, |
|
"grad_norm": 0.49888360500335693, |
|
"learning_rate": 4.739263255394559e-07, |
|
"loss": 0.5374, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.31844660194174756, |
|
"grad_norm": 0.26587429642677307, |
|
"learning_rate": 4.734398381588274e-07, |
|
"loss": 0.5424, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.31955617198335645, |
|
"grad_norm": 0.28447648882865906, |
|
"learning_rate": 4.7294910859776095e-07, |
|
"loss": 0.5161, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.32066574202496534, |
|
"grad_norm": 0.3417870104312897, |
|
"learning_rate": 4.7245414617314193e-07, |
|
"loss": 0.4308, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.3217753120665742, |
|
"grad_norm": 0.3430401086807251, |
|
"learning_rate": 4.719549602822199e-07, |
|
"loss": 0.5222, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.32288488210818306, |
|
"grad_norm": 0.31506893038749695, |
|
"learning_rate": 4.7145156040243017e-07, |
|
"loss": 0.4937, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.32399445214979194, |
|
"grad_norm": 0.327404648065567, |
|
"learning_rate": 4.709439560912139e-07, |
|
"loss": 0.5163, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.32510402219140083, |
|
"grad_norm": 0.291507750749588, |
|
"learning_rate": 4.704321569858368e-07, |
|
"loss": 0.4774, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.3262135922330097, |
|
"grad_norm": 0.2654714286327362, |
|
"learning_rate": 4.6991617280320614e-07, |
|
"loss": 0.4485, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.3273231622746186, |
|
"grad_norm": 0.38568347692489624, |
|
"learning_rate": 4.6939601333968583e-07, |
|
"loss": 0.5054, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.32843273231622744, |
|
"grad_norm": 0.28820493817329407, |
|
"learning_rate": 4.6887168847091085e-07, |
|
"loss": 0.5271, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.3295423023578363, |
|
"grad_norm": 0.2675837576389313, |
|
"learning_rate": 4.683432081516e-07, |
|
"loss": 0.4915, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.3306518723994452, |
|
"grad_norm": 0.3644208014011383, |
|
"learning_rate": 4.678105824153662e-07, |
|
"loss": 0.5216, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.3317614424410541, |
|
"grad_norm": 0.29700183868408203, |
|
"learning_rate": 4.6727382137452644e-07, |
|
"loss": 0.4904, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.332871012482663, |
|
"grad_norm": 0.25862252712249756, |
|
"learning_rate": 4.6673293521990966e-07, |
|
"loss": 0.516, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3339805825242718, |
|
"grad_norm": 0.31422197818756104, |
|
"learning_rate": 4.661879342206636e-07, |
|
"loss": 0.4196, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.3350901525658807, |
|
"grad_norm": 0.3729874789714813, |
|
"learning_rate": 4.6563882872405924e-07, |
|
"loss": 0.5395, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.3361997226074896, |
|
"grad_norm": 0.7892670035362244, |
|
"learning_rate": 4.650856291552948e-07, |
|
"loss": 0.4989, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.3373092926490985, |
|
"grad_norm": 0.33878469467163086, |
|
"learning_rate": 4.645283460172976e-07, |
|
"loss": 0.5837, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.33841886269070737, |
|
"grad_norm": 0.37014704942703247, |
|
"learning_rate": 4.6396698989052473e-07, |
|
"loss": 0.4183, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.33952843273231625, |
|
"grad_norm": 0.3007761240005493, |
|
"learning_rate": 4.6340157143276233e-07, |
|
"loss": 0.4898, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.3406380027739251, |
|
"grad_norm": 1.216626524925232, |
|
"learning_rate": 4.628321013789228e-07, |
|
"loss": 0.5657, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.341747572815534, |
|
"grad_norm": 0.4057266414165497, |
|
"learning_rate": 4.622585905408414e-07, |
|
"loss": 0.5154, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 0.2979249358177185, |
|
"learning_rate": 4.6168104980707103e-07, |
|
"loss": 0.5022, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.34396671289875175, |
|
"grad_norm": 0.4504467248916626, |
|
"learning_rate": 4.6109949014267494e-07, |
|
"loss": 0.4424, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.34507628294036063, |
|
"grad_norm": 0.32083943486213684, |
|
"learning_rate": 4.605139225890192e-07, |
|
"loss": 0.6416, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.34618585298196947, |
|
"grad_norm": 0.2717413604259491, |
|
"learning_rate": 4.5992435826356286e-07, |
|
"loss": 0.5305, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.34729542302357835, |
|
"grad_norm": 0.27425190806388855, |
|
"learning_rate": 4.593308083596464e-07, |
|
"loss": 0.5205, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.34840499306518724, |
|
"grad_norm": 0.3109380304813385, |
|
"learning_rate": 4.587332841462802e-07, |
|
"loss": 0.4805, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.34951456310679613, |
|
"grad_norm": 0.29059240221977234, |
|
"learning_rate": 4.581317969679296e-07, |
|
"loss": 0.5676, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.350624133148405, |
|
"grad_norm": 0.2471311241388321, |
|
"learning_rate": 4.575263582443e-07, |
|
"loss": 0.5058, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.35173370319001385, |
|
"grad_norm": 0.47161543369293213, |
|
"learning_rate": 4.5691697947012016e-07, |
|
"loss": 0.5187, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.35284327323162273, |
|
"grad_norm": 0.3279658257961273, |
|
"learning_rate": 4.563036722149236e-07, |
|
"loss": 0.504, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.3539528432732316, |
|
"grad_norm": 0.365337610244751, |
|
"learning_rate": 4.556864481228293e-07, |
|
"loss": 0.5314, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.3550624133148405, |
|
"grad_norm": 0.3830949664115906, |
|
"learning_rate": 4.5506531891232036e-07, |
|
"loss": 0.4771, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.3561719833564494, |
|
"grad_norm": 0.3073193430900574, |
|
"learning_rate": 4.5444029637602154e-07, |
|
"loss": 0.6175, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.3572815533980582, |
|
"grad_norm": 0.30972909927368164, |
|
"learning_rate": 4.5381139238047553e-07, |
|
"loss": 0.5965, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.3583911234396671, |
|
"grad_norm": 0.35211724042892456, |
|
"learning_rate": 4.531786188659177e-07, |
|
"loss": 0.4085, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.359500693481276, |
|
"grad_norm": 0.3817974030971527, |
|
"learning_rate": 4.525419878460489e-07, |
|
"loss": 0.4394, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.3606102635228849, |
|
"grad_norm": 0.26062262058258057, |
|
"learning_rate": 4.519015114078082e-07, |
|
"loss": 0.5138, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3617198335644938, |
|
"grad_norm": 0.26502180099487305, |
|
"learning_rate": 4.5125720171114265e-07, |
|
"loss": 0.5103, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.36282940360610266, |
|
"grad_norm": 0.3360140025615692, |
|
"learning_rate": 4.506090709887767e-07, |
|
"loss": 0.5787, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.3639389736477115, |
|
"grad_norm": 0.4174387454986572, |
|
"learning_rate": 4.4995713154598014e-07, |
|
"loss": 0.5905, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.3650485436893204, |
|
"grad_norm": 0.30797651410102844, |
|
"learning_rate": 4.493013957603342e-07, |
|
"loss": 0.5341, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.36615811373092927, |
|
"grad_norm": 0.29396864771842957, |
|
"learning_rate": 4.4864187608149664e-07, |
|
"loss": 0.5531, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.36726768377253816, |
|
"grad_norm": 0.4340313971042633, |
|
"learning_rate": 4.4797858503096553e-07, |
|
"loss": 0.5408, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.36837725381414704, |
|
"grad_norm": 0.33175596594810486, |
|
"learning_rate": 4.473115352018412e-07, |
|
"loss": 0.5338, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.3694868238557559, |
|
"grad_norm": 0.2598438262939453, |
|
"learning_rate": 4.4664073925858737e-07, |
|
"loss": 0.4943, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.37059639389736476, |
|
"grad_norm": 0.42363619804382324, |
|
"learning_rate": 4.459662099367908e-07, |
|
"loss": 0.5188, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.37170596393897365, |
|
"grad_norm": 0.30839937925338745, |
|
"learning_rate": 4.4528796004291937e-07, |
|
"loss": 0.551, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.37281553398058254, |
|
"grad_norm": 0.39423060417175293, |
|
"learning_rate": 4.4460600245407876e-07, |
|
"loss": 0.5298, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.3739251040221914, |
|
"grad_norm": 0.2767972946166992, |
|
"learning_rate": 4.439203501177683e-07, |
|
"loss": 0.4744, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.37503467406380026, |
|
"grad_norm": 0.3349682092666626, |
|
"learning_rate": 4.432310160516348e-07, |
|
"loss": 0.6472, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.37614424410540914, |
|
"grad_norm": 0.2644417881965637, |
|
"learning_rate": 4.42538013343226e-07, |
|
"loss": 0.4105, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.37725381414701803, |
|
"grad_norm": 0.5239447951316833, |
|
"learning_rate": 4.4184135514974117e-07, |
|
"loss": 0.5414, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.3783633841886269, |
|
"grad_norm": 0.363673597574234, |
|
"learning_rate": 4.411410546977823e-07, |
|
"loss": 0.6091, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.3794729542302358, |
|
"grad_norm": 0.39377179741859436, |
|
"learning_rate": 4.4043712528310217e-07, |
|
"loss": 0.4794, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.38058252427184464, |
|
"grad_norm": 0.3427687883377075, |
|
"learning_rate": 4.397295802703523e-07, |
|
"loss": 0.532, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.3816920943134535, |
|
"grad_norm": 0.3745235502719879, |
|
"learning_rate": 4.390184330928295e-07, |
|
"loss": 0.5059, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.3828016643550624, |
|
"grad_norm": 0.38224560022354126, |
|
"learning_rate": 4.3830369725222017e-07, |
|
"loss": 0.6141, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.3839112343966713, |
|
"grad_norm": 0.4920729696750641, |
|
"learning_rate": 4.375853863183443e-07, |
|
"loss": 0.5215, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.3850208044382802, |
|
"grad_norm": 0.3441776633262634, |
|
"learning_rate": 4.3686351392889793e-07, |
|
"loss": 0.4538, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.386130374479889, |
|
"grad_norm": 0.3994586765766144, |
|
"learning_rate": 4.361380937891942e-07, |
|
"loss": 0.4517, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.3872399445214979, |
|
"grad_norm": 0.38988494873046875, |
|
"learning_rate": 4.3540913967190286e-07, |
|
"loss": 0.4544, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.3883495145631068, |
|
"grad_norm": 0.31946954131126404, |
|
"learning_rate": 4.346766654167893e-07, |
|
"loss": 0.4662, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3894590846047157, |
|
"grad_norm": 0.336331844329834, |
|
"learning_rate": 4.33940684930451e-07, |
|
"loss": 0.4657, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.39056865464632456, |
|
"grad_norm": 0.35121986269950867, |
|
"learning_rate": 4.3320121218605454e-07, |
|
"loss": 0.4843, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.39167822468793345, |
|
"grad_norm": 0.32553473114967346, |
|
"learning_rate": 4.324582612230694e-07, |
|
"loss": 0.5287, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.3927877947295423, |
|
"grad_norm": 0.2604405879974365, |
|
"learning_rate": 4.3171184614700185e-07, |
|
"loss": 0.5274, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.39389736477115117, |
|
"grad_norm": 0.2987576723098755, |
|
"learning_rate": 4.309619811291271e-07, |
|
"loss": 0.4328, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.39500693481276006, |
|
"grad_norm": 0.3672533631324768, |
|
"learning_rate": 4.3020868040622023e-07, |
|
"loss": 0.5229, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.39611650485436894, |
|
"grad_norm": 0.46902307868003845, |
|
"learning_rate": 4.294519582802857e-07, |
|
"loss": 0.5167, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.39722607489597783, |
|
"grad_norm": 0.5896915793418884, |
|
"learning_rate": 4.2869182911828627e-07, |
|
"loss": 0.5236, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.39833564493758666, |
|
"grad_norm": 0.31331950426101685, |
|
"learning_rate": 4.2792830735186976e-07, |
|
"loss": 0.58, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.39944521497919555, |
|
"grad_norm": 0.4323517382144928, |
|
"learning_rate": 4.2716140747709516e-07, |
|
"loss": 0.4798, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.40055478502080444, |
|
"grad_norm": 0.24910062551498413, |
|
"learning_rate": 4.2639114405415777e-07, |
|
"loss": 0.5023, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.4016643550624133, |
|
"grad_norm": 0.3129631578922272, |
|
"learning_rate": 4.256175317071122e-07, |
|
"loss": 0.5323, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.4027739251040222, |
|
"grad_norm": 0.30831965804100037, |
|
"learning_rate": 4.248405851235952e-07, |
|
"loss": 0.6024, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.40388349514563104, |
|
"grad_norm": 0.2090596854686737, |
|
"learning_rate": 4.2406031905454664e-07, |
|
"loss": 0.5647, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.40499306518723993, |
|
"grad_norm": 0.3623282015323639, |
|
"learning_rate": 4.2327674831392923e-07, |
|
"loss": 0.4927, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.4061026352288488, |
|
"grad_norm": 0.2708721458911896, |
|
"learning_rate": 4.2248988777844756e-07, |
|
"loss": 0.4644, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.4072122052704577, |
|
"grad_norm": 0.3101958930492401, |
|
"learning_rate": 4.216997523872656e-07, |
|
"loss": 0.4676, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.4083217753120666, |
|
"grad_norm": 0.34548845887184143, |
|
"learning_rate": 4.2090635714172295e-07, |
|
"loss": 0.4972, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.4094313453536754, |
|
"grad_norm": 0.2833360433578491, |
|
"learning_rate": 4.2010971710505024e-07, |
|
"loss": 0.5035, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.4105409153952843, |
|
"grad_norm": 0.4793650805950165, |
|
"learning_rate": 4.1930984740208277e-07, |
|
"loss": 0.5244, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.4116504854368932, |
|
"grad_norm": 0.25977978110313416, |
|
"learning_rate": 4.185067632189737e-07, |
|
"loss": 0.4568, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.4127600554785021, |
|
"grad_norm": 0.6289175152778625, |
|
"learning_rate": 4.177004798029058e-07, |
|
"loss": 0.4981, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.413869625520111, |
|
"grad_norm": 0.33281320333480835, |
|
"learning_rate": 4.1689101246180134e-07, |
|
"loss": 0.5826, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.41497919556171986, |
|
"grad_norm": 0.39740175008773804, |
|
"learning_rate": 4.1607837656403245e-07, |
|
"loss": 0.4544, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.4160887656033287, |
|
"grad_norm": 0.24023501574993134, |
|
"learning_rate": 4.1526258753812833e-07, |
|
"loss": 0.5676, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4171983356449376, |
|
"grad_norm": 0.3031497001647949, |
|
"learning_rate": 4.1444366087248304e-07, |
|
"loss": 0.4852, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.41830790568654647, |
|
"grad_norm": 1.512039303779602, |
|
"learning_rate": 4.136216121150611e-07, |
|
"loss": 0.6114, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.41941747572815535, |
|
"grad_norm": 0.3467373549938202, |
|
"learning_rate": 4.1279645687310245e-07, |
|
"loss": 0.4715, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.42052704576976424, |
|
"grad_norm": 0.2934076189994812, |
|
"learning_rate": 4.11968210812826e-07, |
|
"loss": 0.4977, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.42163661581137307, |
|
"grad_norm": 0.3921898305416107, |
|
"learning_rate": 4.111368896591323e-07, |
|
"loss": 0.5787, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.42274618585298196, |
|
"grad_norm": 1.2903082370758057, |
|
"learning_rate": 4.10302509195305e-07, |
|
"loss": 0.4826, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.42385575589459085, |
|
"grad_norm": 0.30528688430786133, |
|
"learning_rate": 4.0946508526271107e-07, |
|
"loss": 0.5653, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.42496532593619973, |
|
"grad_norm": 0.35863542556762695, |
|
"learning_rate": 4.086246337605002e-07, |
|
"loss": 0.4821, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.4260748959778086, |
|
"grad_norm": 0.28547871112823486, |
|
"learning_rate": 4.077811706453028e-07, |
|
"loss": 0.4127, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.42718446601941745, |
|
"grad_norm": 0.489197313785553, |
|
"learning_rate": 4.069347119309271e-07, |
|
"loss": 0.5363, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.42829403606102634, |
|
"grad_norm": 0.2887154221534729, |
|
"learning_rate": 4.060852736880553e-07, |
|
"loss": 0.5618, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.4294036061026352, |
|
"grad_norm": 0.2965123951435089, |
|
"learning_rate": 4.0523287204393795e-07, |
|
"loss": 0.4854, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.4305131761442441, |
|
"grad_norm": 0.326860636472702, |
|
"learning_rate": 4.0437752318208846e-07, |
|
"loss": 0.4852, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.431622746185853, |
|
"grad_norm": 0.33304309844970703, |
|
"learning_rate": 4.0351924334197516e-07, |
|
"loss": 0.4727, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.43273231622746183, |
|
"grad_norm": 0.26137974858283997, |
|
"learning_rate": 4.0265804881871366e-07, |
|
"loss": 0.536, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.4338418862690707, |
|
"grad_norm": 0.543980062007904, |
|
"learning_rate": 4.0179395596275665e-07, |
|
"loss": 0.5694, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.4349514563106796, |
|
"grad_norm": 0.3294641971588135, |
|
"learning_rate": 4.0092698117958447e-07, |
|
"loss": 0.5928, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.4360610263522885, |
|
"grad_norm": 0.3174699544906616, |
|
"learning_rate": 4.0005714092939255e-07, |
|
"loss": 0.5022, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.4371705963938974, |
|
"grad_norm": 0.3462190628051758, |
|
"learning_rate": 3.9918445172677995e-07, |
|
"loss": 0.4936, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.43828016643550627, |
|
"grad_norm": 0.2537282407283783, |
|
"learning_rate": 3.983089301404351e-07, |
|
"loss": 0.6196, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.4393897364771151, |
|
"grad_norm": 0.4872954189777374, |
|
"learning_rate": 3.9743059279282126e-07, |
|
"loss": 0.46, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.440499306518724, |
|
"grad_norm": 0.3006153702735901, |
|
"learning_rate": 3.9654945635986155e-07, |
|
"loss": 0.5201, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.4416088765603329, |
|
"grad_norm": 0.28118792176246643, |
|
"learning_rate": 3.9566553757062154e-07, |
|
"loss": 0.548, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.44271844660194176, |
|
"grad_norm": 0.4475691616535187, |
|
"learning_rate": 3.947788532069923e-07, |
|
"loss": 0.498, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.44382801664355065, |
|
"grad_norm": 0.39291325211524963, |
|
"learning_rate": 3.938894201033713e-07, |
|
"loss": 0.4702, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4449375866851595, |
|
"grad_norm": 0.3244176506996155, |
|
"learning_rate": 3.929972551463431e-07, |
|
"loss": 0.4421, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.44604715672676837, |
|
"grad_norm": 0.3121548295021057, |
|
"learning_rate": 3.9210237527435864e-07, |
|
"loss": 0.5523, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.44715672676837726, |
|
"grad_norm": 0.33265426754951477, |
|
"learning_rate": 3.9120479747741344e-07, |
|
"loss": 0.5159, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.44826629680998614, |
|
"grad_norm": 0.298480361700058, |
|
"learning_rate": 3.903045387967256e-07, |
|
"loss": 0.4688, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.44937586685159503, |
|
"grad_norm": 0.2703563868999481, |
|
"learning_rate": 3.8940161632441157e-07, |
|
"loss": 0.4673, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.45048543689320386, |
|
"grad_norm": 0.4872739911079407, |
|
"learning_rate": 3.884960472031622e-07, |
|
"loss": 0.5151, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.45159500693481275, |
|
"grad_norm": 0.4316668212413788, |
|
"learning_rate": 3.87587848625917e-07, |
|
"loss": 0.5357, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.45270457697642164, |
|
"grad_norm": 0.2701937258243561, |
|
"learning_rate": 3.866770378355375e-07, |
|
"loss": 0.4859, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.4538141470180305, |
|
"grad_norm": 0.2920953035354614, |
|
"learning_rate": 3.8576363212448057e-07, |
|
"loss": 0.5778, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.4549237170596394, |
|
"grad_norm": 0.4294867217540741, |
|
"learning_rate": 3.8484764883446944e-07, |
|
"loss": 0.5387, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.45603328710124824, |
|
"grad_norm": 0.42127662897109985, |
|
"learning_rate": 3.8392910535616476e-07, |
|
"loss": 0.5998, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 0.3392697870731354, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"loss": 0.4547, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.458252427184466, |
|
"grad_norm": 0.2972329258918762, |
|
"learning_rate": 3.820844076400216e-07, |
|
"loss": 0.5605, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.4593619972260749, |
|
"grad_norm": 0.3029995560646057, |
|
"learning_rate": 3.8115828842521514e-07, |
|
"loss": 0.5124, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.4604715672676838, |
|
"grad_norm": 0.3053463101387024, |
|
"learning_rate": 3.802296790675137e-07, |
|
"loss": 0.5181, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.4615811373092926, |
|
"grad_norm": 0.4185832738876343, |
|
"learning_rate": 3.7929859719729394e-07, |
|
"loss": 0.5177, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.4626907073509015, |
|
"grad_norm": 0.3226168155670166, |
|
"learning_rate": 3.783650604918746e-07, |
|
"loss": 0.5008, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.4638002773925104, |
|
"grad_norm": 0.3534069359302521, |
|
"learning_rate": 3.7742908667518175e-07, |
|
"loss": 0.529, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.4649098474341193, |
|
"grad_norm": 0.37676820158958435, |
|
"learning_rate": 3.7649069351741185e-07, |
|
"loss": 0.5128, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.46601941747572817, |
|
"grad_norm": 0.30804529786109924, |
|
"learning_rate": 3.755498988346945e-07, |
|
"loss": 0.5262, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.46712898751733706, |
|
"grad_norm": 0.3896881639957428, |
|
"learning_rate": 3.746067204887538e-07, |
|
"loss": 0.4655, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.4682385575589459, |
|
"grad_norm": 0.3813321888446808, |
|
"learning_rate": 3.7366117638657e-07, |
|
"loss": 0.4867, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.4693481276005548, |
|
"grad_norm": 0.3187031149864197, |
|
"learning_rate": 3.72713284480039e-07, |
|
"loss": 0.4468, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.47045769764216366, |
|
"grad_norm": 0.3183457851409912, |
|
"learning_rate": 3.7176306276563126e-07, |
|
"loss": 0.491, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.47156726768377255, |
|
"grad_norm": 0.58977210521698, |
|
"learning_rate": 3.708105292840509e-07, |
|
"loss": 0.5106, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.47267683772538144, |
|
"grad_norm": 0.3110608756542206, |
|
"learning_rate": 3.698557021198925e-07, |
|
"loss": 0.4688, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.47378640776699027, |
|
"grad_norm": 0.23853054642677307, |
|
"learning_rate": 3.6889859940129814e-07, |
|
"loss": 0.5505, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.47489597780859916, |
|
"grad_norm": 0.24323715269565582, |
|
"learning_rate": 3.6793923929961296e-07, |
|
"loss": 0.4357, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.47600554785020804, |
|
"grad_norm": 0.39231613278388977, |
|
"learning_rate": 3.669776400290403e-07, |
|
"loss": 0.5529, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.47711511789181693, |
|
"grad_norm": 0.3708134889602661, |
|
"learning_rate": 3.66013819846296e-07, |
|
"loss": 0.6198, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.4782246879334258, |
|
"grad_norm": 0.2724001109600067, |
|
"learning_rate": 3.6504779705026156e-07, |
|
"loss": 0.4517, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.47933425797503465, |
|
"grad_norm": 0.3848133683204651, |
|
"learning_rate": 3.6407958998163687e-07, |
|
"loss": 0.5878, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.48044382801664354, |
|
"grad_norm": 0.3788512945175171, |
|
"learning_rate": 3.6310921702259184e-07, |
|
"loss": 0.4963, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.4815533980582524, |
|
"grad_norm": 0.45098355412483215, |
|
"learning_rate": 3.6213669659641757e-07, |
|
"loss": 0.4629, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.4826629680998613, |
|
"grad_norm": 0.5497767329216003, |
|
"learning_rate": 3.611620471671766e-07, |
|
"loss": 0.5813, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.4837725381414702, |
|
"grad_norm": 0.3716546297073364, |
|
"learning_rate": 3.6018528723935214e-07, |
|
"loss": 0.498, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.48488210818307903, |
|
"grad_norm": 0.323794960975647, |
|
"learning_rate": 3.5920643535749696e-07, |
|
"loss": 0.4899, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.4859916782246879, |
|
"grad_norm": 0.2931385338306427, |
|
"learning_rate": 3.582255101058811e-07, |
|
"loss": 0.56, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.4871012482662968, |
|
"grad_norm": 0.4833517372608185, |
|
"learning_rate": 3.572425301081392e-07, |
|
"loss": 0.5229, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.4882108183079057, |
|
"grad_norm": 0.3123915195465088, |
|
"learning_rate": 3.5625751402691693e-07, |
|
"loss": 0.5081, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.4893203883495146, |
|
"grad_norm": 0.24910438060760498, |
|
"learning_rate": 3.5527048056351654e-07, |
|
"loss": 0.5406, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.49042995839112347, |
|
"grad_norm": 0.4142923355102539, |
|
"learning_rate": 3.542814484575419e-07, |
|
"loss": 0.4703, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.4915395284327323, |
|
"grad_norm": 0.34447526931762695, |
|
"learning_rate": 3.532904364865426e-07, |
|
"loss": 0.4803, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.4926490984743412, |
|
"grad_norm": 0.8663418292999268, |
|
"learning_rate": 3.522974634656576e-07, |
|
"loss": 0.4718, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.49375866851595007, |
|
"grad_norm": 0.5423429012298584, |
|
"learning_rate": 3.5130254824725787e-07, |
|
"loss": 0.4895, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.49486823855755896, |
|
"grad_norm": 0.4453887641429901, |
|
"learning_rate": 3.503057097205885e-07, |
|
"loss": 0.5677, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.49597780859916785, |
|
"grad_norm": 0.41120442748069763, |
|
"learning_rate": 3.4930696681141034e-07, |
|
"loss": 0.5781, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.4970873786407767, |
|
"grad_norm": 0.38804373145103455, |
|
"learning_rate": 3.4830633848164006e-07, |
|
"loss": 0.5453, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.49819694868238557, |
|
"grad_norm": 0.4749200940132141, |
|
"learning_rate": 3.473038437289907e-07, |
|
"loss": 0.516, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.49930651872399445, |
|
"grad_norm": 0.34202054142951965, |
|
"learning_rate": 3.462995015866109e-07, |
|
"loss": 0.6462, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5004160887656033, |
|
"grad_norm": 0.29288092255592346, |
|
"learning_rate": 3.452933311227232e-07, |
|
"loss": 0.4496, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.5015256588072122, |
|
"grad_norm": 0.4564000070095062, |
|
"learning_rate": 3.442853514402626e-07, |
|
"loss": 0.5254, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.5026352288488211, |
|
"grad_norm": 0.4368923604488373, |
|
"learning_rate": 3.432755816765131e-07, |
|
"loss": 0.3723, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.50374479889043, |
|
"grad_norm": 0.42233774065971375, |
|
"learning_rate": 3.422640410027451e-07, |
|
"loss": 0.4816, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.5048543689320388, |
|
"grad_norm": 0.5136800408363342, |
|
"learning_rate": 3.412507486238512e-07, |
|
"loss": 0.48, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.5059639389736477, |
|
"grad_norm": 0.6266002655029297, |
|
"learning_rate": 3.4023572377798116e-07, |
|
"loss": 0.4605, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.5070735090152566, |
|
"grad_norm": 0.38814643025398254, |
|
"learning_rate": 3.3921898573617715e-07, |
|
"loss": 0.4482, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.5081830790568654, |
|
"grad_norm": 0.3517705202102661, |
|
"learning_rate": 3.382005538020078e-07, |
|
"loss": 0.4514, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.5092926490984744, |
|
"grad_norm": 0.25241366028785706, |
|
"learning_rate": 3.371804473112014e-07, |
|
"loss": 0.5004, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.5104022191400832, |
|
"grad_norm": 0.5189043283462524, |
|
"learning_rate": 3.3615868563127937e-07, |
|
"loss": 0.5339, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.511511789181692, |
|
"grad_norm": 0.42029428482055664, |
|
"learning_rate": 3.3513528816118775e-07, |
|
"loss": 0.551, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.512621359223301, |
|
"grad_norm": 0.5070712566375732, |
|
"learning_rate": 3.341102743309296e-07, |
|
"loss": 0.5013, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.5137309292649098, |
|
"grad_norm": 0.2703372836112976, |
|
"learning_rate": 3.3308366360119584e-07, |
|
"loss": 0.5446, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.5148404993065188, |
|
"grad_norm": 0.39520278573036194, |
|
"learning_rate": 3.3205547546299575e-07, |
|
"loss": 0.5542, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.5159500693481276, |
|
"grad_norm": 0.4130886197090149, |
|
"learning_rate": 3.3102572943728673e-07, |
|
"loss": 0.4348, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.5170596393897364, |
|
"grad_norm": 0.30212274193763733, |
|
"learning_rate": 3.2999444507460437e-07, |
|
"loss": 0.4626, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.5181692094313454, |
|
"grad_norm": 0.24097274243831635, |
|
"learning_rate": 3.2896164195469033e-07, |
|
"loss": 0.5347, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.5192787794729542, |
|
"grad_norm": 0.2773560583591461, |
|
"learning_rate": 3.279273396861214e-07, |
|
"loss": 0.6156, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.5203883495145631, |
|
"grad_norm": 0.4777432382106781, |
|
"learning_rate": 3.268915579059366e-07, |
|
"loss": 0.4348, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.521497919556172, |
|
"grad_norm": 0.30696791410446167, |
|
"learning_rate": 3.2585431627926476e-07, |
|
"loss": 0.6035, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.5226074895977808, |
|
"grad_norm": 0.2942405343055725, |
|
"learning_rate": 3.248156344989512e-07, |
|
"loss": 0.4254, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.5237170596393897, |
|
"grad_norm": 0.498976469039917, |
|
"learning_rate": 3.237755322851834e-07, |
|
"loss": 0.5464, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.5248266296809986, |
|
"grad_norm": 0.5058267712593079, |
|
"learning_rate": 3.2273402938511706e-07, |
|
"loss": 0.5168, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.5259361997226075, |
|
"grad_norm": 0.164671391248703, |
|
"learning_rate": 3.2169114557250103e-07, |
|
"loss": 0.5291, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.5270457697642164, |
|
"grad_norm": 0.45560675859451294, |
|
"learning_rate": 3.206469006473017e-07, |
|
"loss": 0.5577, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5281553398058253, |
|
"grad_norm": 0.4424368441104889, |
|
"learning_rate": 3.196013144353274e-07, |
|
"loss": 0.4828, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.5292649098474341, |
|
"grad_norm": 0.4512025713920593, |
|
"learning_rate": 3.185544067878518e-07, |
|
"loss": 0.5543, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.530374479889043, |
|
"grad_norm": 0.30722030997276306, |
|
"learning_rate": 3.175061975812371e-07, |
|
"loss": 0.5418, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.5314840499306519, |
|
"grad_norm": 0.38087835907936096, |
|
"learning_rate": 3.1645670671655645e-07, |
|
"loss": 0.4493, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.5325936199722607, |
|
"grad_norm": 0.3499601483345032, |
|
"learning_rate": 3.154059541192164e-07, |
|
"loss": 0.5878, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.5337031900138697, |
|
"grad_norm": 0.4532395601272583, |
|
"learning_rate": 3.1435395973857876e-07, |
|
"loss": 0.5829, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.5348127600554785, |
|
"grad_norm": 0.30889692902565, |
|
"learning_rate": 3.1330074354758094e-07, |
|
"loss": 0.5797, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.5359223300970873, |
|
"grad_norm": 0.4182281494140625, |
|
"learning_rate": 3.12246325542358e-07, |
|
"loss": 0.5118, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.5370319001386963, |
|
"grad_norm": 0.35436221957206726, |
|
"learning_rate": 3.11190725741862e-07, |
|
"loss": 0.6517, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.5381414701803051, |
|
"grad_norm": 0.33836600184440613, |
|
"learning_rate": 3.1013396418748234e-07, |
|
"loss": 0.4423, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.5392510402219141, |
|
"grad_norm": 0.34394633769989014, |
|
"learning_rate": 3.090760609426655e-07, |
|
"loss": 0.4322, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.5403606102635229, |
|
"grad_norm": 0.3953171670436859, |
|
"learning_rate": 3.080170360925336e-07, |
|
"loss": 0.443, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.5414701803051317, |
|
"grad_norm": 0.27416878938674927, |
|
"learning_rate": 3.069569097435033e-07, |
|
"loss": 0.4737, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.5425797503467407, |
|
"grad_norm": 0.325531005859375, |
|
"learning_rate": 3.0589570202290433e-07, |
|
"loss": 0.4698, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.5436893203883495, |
|
"grad_norm": 0.44285646080970764, |
|
"learning_rate": 3.0483343307859663e-07, |
|
"loss": 0.4886, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.5447988904299584, |
|
"grad_norm": 0.2852041721343994, |
|
"learning_rate": 3.0377012307858904e-07, |
|
"loss": 0.5289, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.5459084604715673, |
|
"grad_norm": 0.3136042356491089, |
|
"learning_rate": 3.027057922106549e-07, |
|
"loss": 0.494, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.5470180305131761, |
|
"grad_norm": 0.3801943361759186, |
|
"learning_rate": 3.0164046068195e-07, |
|
"loss": 0.4818, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.548127600554785, |
|
"grad_norm": 0.3817514479160309, |
|
"learning_rate": 3.0057414871862816e-07, |
|
"loss": 0.5448, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.5492371705963939, |
|
"grad_norm": 0.3009171485900879, |
|
"learning_rate": 2.9950687656545787e-07, |
|
"loss": 0.4765, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.5503467406380028, |
|
"grad_norm": 0.4833567440509796, |
|
"learning_rate": 2.9843866448543727e-07, |
|
"loss": 0.5342, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.5514563106796116, |
|
"grad_norm": 0.4005512297153473, |
|
"learning_rate": 2.973695327594099e-07, |
|
"loss": 0.4766, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.5525658807212205, |
|
"grad_norm": 0.33436936140060425, |
|
"learning_rate": 2.9629950168567954e-07, |
|
"loss": 0.4826, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.5536754507628294, |
|
"grad_norm": 0.33340537548065186, |
|
"learning_rate": 2.9522859157962454e-07, |
|
"loss": 0.5473, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.5547850208044383, |
|
"grad_norm": 0.33686235547065735, |
|
"learning_rate": 2.9415682277331265e-07, |
|
"loss": 0.5534, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5558945908460472, |
|
"grad_norm": 0.3106544315814972, |
|
"learning_rate": 2.930842156151146e-07, |
|
"loss": 0.497, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.557004160887656, |
|
"grad_norm": 0.428392231464386, |
|
"learning_rate": 2.920107904693178e-07, |
|
"loss": 0.483, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.5581137309292649, |
|
"grad_norm": 0.3546343743801117, |
|
"learning_rate": 2.9093656771574006e-07, |
|
"loss": 0.4438, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.5592233009708738, |
|
"grad_norm": 0.5527733564376831, |
|
"learning_rate": 2.8986156774934204e-07, |
|
"loss": 0.5118, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.5603328710124826, |
|
"grad_norm": 0.33526676893234253, |
|
"learning_rate": 2.8878581097984075e-07, |
|
"loss": 0.565, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.5614424410540916, |
|
"grad_norm": 0.4417477548122406, |
|
"learning_rate": 2.877093178313214e-07, |
|
"loss": 0.4793, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.5625520110957004, |
|
"grad_norm": 0.2999446392059326, |
|
"learning_rate": 2.8663210874185013e-07, |
|
"loss": 0.5449, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.5636615811373092, |
|
"grad_norm": 0.38563647866249084, |
|
"learning_rate": 2.8555420416308573e-07, |
|
"loss": 0.5037, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.5647711511789182, |
|
"grad_norm": 0.8459754586219788, |
|
"learning_rate": 2.8447562455989134e-07, |
|
"loss": 0.5474, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.565880721220527, |
|
"grad_norm": 0.3799205720424652, |
|
"learning_rate": 2.8339639040994604e-07, |
|
"loss": 0.609, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.566990291262136, |
|
"grad_norm": 0.51907879114151, |
|
"learning_rate": 2.8231652220335603e-07, |
|
"loss": 0.4621, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.5680998613037448, |
|
"grad_norm": 0.3080946207046509, |
|
"learning_rate": 2.812360404422653e-07, |
|
"loss": 0.5304, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.5692094313453536, |
|
"grad_norm": 0.35099002718925476, |
|
"learning_rate": 2.80154965640467e-07, |
|
"loss": 0.5226, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.5703190013869626, |
|
"grad_norm": 0.3252660036087036, |
|
"learning_rate": 2.790733183230136e-07, |
|
"loss": 0.4481, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.3965422511100769, |
|
"learning_rate": 2.7799111902582693e-07, |
|
"loss": 0.5616, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.5725381414701803, |
|
"grad_norm": 0.4213715195655823, |
|
"learning_rate": 2.7690838829530886e-07, |
|
"loss": 0.4895, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.5736477115117892, |
|
"grad_norm": 0.27551645040512085, |
|
"learning_rate": 2.758251466879508e-07, |
|
"loss": 0.532, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.574757281553398, |
|
"grad_norm": 0.6193405389785767, |
|
"learning_rate": 2.7474141476994366e-07, |
|
"loss": 0.5517, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.575866851595007, |
|
"grad_norm": 0.4643997251987457, |
|
"learning_rate": 2.736572131167872e-07, |
|
"loss": 0.6117, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.5769764216366158, |
|
"grad_norm": 0.3855043351650238, |
|
"learning_rate": 2.725725623128994e-07, |
|
"loss": 0.4358, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.5780859916782247, |
|
"grad_norm": 0.2621045410633087, |
|
"learning_rate": 2.71487482951226e-07, |
|
"loss": 0.5391, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.5791955617198336, |
|
"grad_norm": 0.4209457337856293, |
|
"learning_rate": 2.7040199563284894e-07, |
|
"loss": 0.5641, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.5803051317614425, |
|
"grad_norm": 0.3612224757671356, |
|
"learning_rate": 2.6931612096659566e-07, |
|
"loss": 0.5176, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.5814147018030513, |
|
"grad_norm": 0.30091843008995056, |
|
"learning_rate": 2.682298795686478e-07, |
|
"loss": 0.4728, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.5825242718446602, |
|
"grad_norm": 0.33328065276145935, |
|
"learning_rate": 2.671432920621495e-07, |
|
"loss": 0.5307, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.5836338418862691, |
|
"grad_norm": 0.2998218536376953, |
|
"learning_rate": 2.6605637907681613e-07, |
|
"loss": 0.5042, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.5847434119278779, |
|
"grad_norm": 0.576810896396637, |
|
"learning_rate": 2.6496916124854244e-07, |
|
"loss": 0.5064, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.5858529819694869, |
|
"grad_norm": 0.40196332335472107, |
|
"learning_rate": 2.638816592190112e-07, |
|
"loss": 0.5932, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.5869625520110957, |
|
"grad_norm": 0.524108350276947, |
|
"learning_rate": 2.627938936353006e-07, |
|
"loss": 0.5463, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.5880721220527045, |
|
"grad_norm": 0.45371681451797485, |
|
"learning_rate": 2.617058851494927e-07, |
|
"loss": 0.5356, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.5891816920943135, |
|
"grad_norm": 0.3213278651237488, |
|
"learning_rate": 2.606176544182813e-07, |
|
"loss": 0.5075, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.5902912621359223, |
|
"grad_norm": 0.34761932492256165, |
|
"learning_rate": 2.5952922210257964e-07, |
|
"loss": 0.5104, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.5914008321775313, |
|
"grad_norm": 0.32400938868522644, |
|
"learning_rate": 2.584406088671284e-07, |
|
"loss": 0.4889, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.5925104022191401, |
|
"grad_norm": 0.30970096588134766, |
|
"learning_rate": 2.573518353801028e-07, |
|
"loss": 0.6171, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.5936199722607489, |
|
"grad_norm": 0.3451375365257263, |
|
"learning_rate": 2.5626292231272086e-07, |
|
"loss": 0.4881, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.5947295423023579, |
|
"grad_norm": 0.41977164149284363, |
|
"learning_rate": 2.5517389033885056e-07, |
|
"loss": 0.5399, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.5958391123439667, |
|
"grad_norm": 0.3118828535079956, |
|
"learning_rate": 2.540847601346173e-07, |
|
"loss": 0.4543, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.5969486823855756, |
|
"grad_norm": 0.36801040172576904, |
|
"learning_rate": 2.5299555237801176e-07, |
|
"loss": 0.4706, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.5980582524271845, |
|
"grad_norm": 0.3566454350948334, |
|
"learning_rate": 2.5190628774849667e-07, |
|
"loss": 0.5271, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.5991678224687933, |
|
"grad_norm": 0.436729371547699, |
|
"learning_rate": 2.5081698692661475e-07, |
|
"loss": 0.5308, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.6002773925104022, |
|
"grad_norm": 0.39320242404937744, |
|
"learning_rate": 2.497276705935957e-07, |
|
"loss": 0.5804, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.6013869625520111, |
|
"grad_norm": 0.2793132960796356, |
|
"learning_rate": 2.4863835943096386e-07, |
|
"loss": 0.4361, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.60249653259362, |
|
"grad_norm": 0.31615859270095825, |
|
"learning_rate": 2.4754907412014526e-07, |
|
"loss": 0.468, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.6036061026352288, |
|
"grad_norm": 0.27115607261657715, |
|
"learning_rate": 2.464598353420754e-07, |
|
"loss": 0.4934, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.6047156726768377, |
|
"grad_norm": 0.41032761335372925, |
|
"learning_rate": 2.45370663776806e-07, |
|
"loss": 0.6101, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.6058252427184466, |
|
"grad_norm": 0.3607085943222046, |
|
"learning_rate": 2.442815801031128e-07, |
|
"loss": 0.4863, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.6069348127600555, |
|
"grad_norm": 0.3773590624332428, |
|
"learning_rate": 2.431926049981029e-07, |
|
"loss": 0.4498, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.6080443828016644, |
|
"grad_norm": 0.3329433500766754, |
|
"learning_rate": 2.4210375913682203e-07, |
|
"loss": 0.5377, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.6091539528432732, |
|
"grad_norm": 0.3469200134277344, |
|
"learning_rate": 2.4101506319186234e-07, |
|
"loss": 0.5213, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.6102635228848821, |
|
"grad_norm": 0.5309900045394897, |
|
"learning_rate": 2.399265378329694e-07, |
|
"loss": 0.5984, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.611373092926491, |
|
"grad_norm": 0.28028154373168945, |
|
"learning_rate": 2.388382037266504e-07, |
|
"loss": 0.5899, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.6124826629680998, |
|
"grad_norm": 0.3442562520503998, |
|
"learning_rate": 2.3775008153578108e-07, |
|
"loss": 0.4739, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.6135922330097088, |
|
"grad_norm": 0.38204333186149597, |
|
"learning_rate": 2.366621919192141e-07, |
|
"loss": 0.4846, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.6147018030513176, |
|
"grad_norm": 0.4290561378002167, |
|
"learning_rate": 2.3557455553138645e-07, |
|
"loss": 0.5242, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.6158113730929264, |
|
"grad_norm": 0.33580636978149414, |
|
"learning_rate": 2.3448719302192729e-07, |
|
"loss": 0.4827, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.6169209431345354, |
|
"grad_norm": 0.3730616569519043, |
|
"learning_rate": 2.3340012503526607e-07, |
|
"loss": 0.55, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.6180305131761442, |
|
"grad_norm": 0.5171761512756348, |
|
"learning_rate": 2.323133722102404e-07, |
|
"loss": 0.514, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.6191400832177532, |
|
"grad_norm": 0.328640878200531, |
|
"learning_rate": 2.3122695517970434e-07, |
|
"loss": 0.5019, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.620249653259362, |
|
"grad_norm": 0.34641680121421814, |
|
"learning_rate": 2.3014089457013675e-07, |
|
"loss": 0.5429, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.6213592233009708, |
|
"grad_norm": 0.3420324921607971, |
|
"learning_rate": 2.2905521100124935e-07, |
|
"loss": 0.4482, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.6224687933425798, |
|
"grad_norm": 0.44696226716041565, |
|
"learning_rate": 2.2796992508559563e-07, |
|
"loss": 0.5247, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.6235783633841886, |
|
"grad_norm": 0.3765786290168762, |
|
"learning_rate": 2.2688505742817916e-07, |
|
"loss": 0.5924, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.6246879334257975, |
|
"grad_norm": 0.32200539112091064, |
|
"learning_rate": 2.258006286260626e-07, |
|
"loss": 0.5605, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.6257975034674064, |
|
"grad_norm": 0.455307275056839, |
|
"learning_rate": 2.2471665926797676e-07, |
|
"loss": 0.5314, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.6269070735090153, |
|
"grad_norm": 0.25619956851005554, |
|
"learning_rate": 2.2363316993392932e-07, |
|
"loss": 0.551, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.6280166435506241, |
|
"grad_norm": 0.3347276747226715, |
|
"learning_rate": 2.225501811948145e-07, |
|
"loss": 0.4912, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.629126213592233, |
|
"grad_norm": 0.6607474088668823, |
|
"learning_rate": 2.2146771361202215e-07, |
|
"loss": 0.5319, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.6302357836338419, |
|
"grad_norm": 0.39210301637649536, |
|
"learning_rate": 2.203857877370477e-07, |
|
"loss": 0.4812, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.6313453536754507, |
|
"grad_norm": 0.36486566066741943, |
|
"learning_rate": 2.193044241111018e-07, |
|
"loss": 0.5066, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.6324549237170597, |
|
"grad_norm": 0.25275692343711853, |
|
"learning_rate": 2.182236432647204e-07, |
|
"loss": 0.4351, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.6335644937586685, |
|
"grad_norm": 0.32072117924690247, |
|
"learning_rate": 2.1714346571737485e-07, |
|
"loss": 0.4902, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.6346740638002774, |
|
"grad_norm": 0.3688344657421112, |
|
"learning_rate": 2.160639119770824e-07, |
|
"loss": 0.4802, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.6357836338418863, |
|
"grad_norm": 0.3499152958393097, |
|
"learning_rate": 2.1498500254001683e-07, |
|
"loss": 0.5426, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.6368932038834951, |
|
"grad_norm": 0.5433036088943481, |
|
"learning_rate": 2.1390675789011945e-07, |
|
"loss": 0.5413, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.6380027739251041, |
|
"grad_norm": 0.32321110367774963, |
|
"learning_rate": 2.128291984987099e-07, |
|
"loss": 0.5557, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.6391123439667129, |
|
"grad_norm": 0.3584541380405426, |
|
"learning_rate": 2.117523448240977e-07, |
|
"loss": 0.6089, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.6402219140083217, |
|
"grad_norm": 0.35222071409225464, |
|
"learning_rate": 2.1067621731119384e-07, |
|
"loss": 0.4796, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.6413314840499307, |
|
"grad_norm": 0.3017160892486572, |
|
"learning_rate": 2.0960083639112243e-07, |
|
"loss": 0.4427, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.6424410540915395, |
|
"grad_norm": 0.3295774459838867, |
|
"learning_rate": 2.0852622248083308e-07, |
|
"loss": 0.4628, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.6435506241331485, |
|
"grad_norm": 0.5670339465141296, |
|
"learning_rate": 2.0745239598271312e-07, |
|
"loss": 0.5061, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.6446601941747573, |
|
"grad_norm": 0.3149929344654083, |
|
"learning_rate": 2.0637937728420008e-07, |
|
"loss": 0.4442, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.6457697642163661, |
|
"grad_norm": 0.5195226669311523, |
|
"learning_rate": 2.0530718675739488e-07, |
|
"loss": 0.5651, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.6468793342579751, |
|
"grad_norm": 0.34720999002456665, |
|
"learning_rate": 2.0423584475867504e-07, |
|
"loss": 0.5341, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.6479889042995839, |
|
"grad_norm": 0.41216403245925903, |
|
"learning_rate": 2.0316537162830784e-07, |
|
"loss": 0.4756, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.6490984743411928, |
|
"grad_norm": 0.4366797208786011, |
|
"learning_rate": 2.020957876900648e-07, |
|
"loss": 0.5297, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.6502080443828017, |
|
"grad_norm": 0.3360641598701477, |
|
"learning_rate": 2.0102711325083513e-07, |
|
"loss": 0.6328, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.6513176144244105, |
|
"grad_norm": 0.37930089235305786, |
|
"learning_rate": 1.999593686002406e-07, |
|
"loss": 0.5153, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.6524271844660194, |
|
"grad_norm": 0.44946834444999695, |
|
"learning_rate": 1.9889257401025015e-07, |
|
"loss": 0.5232, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.6535367545076283, |
|
"grad_norm": 0.43730628490448, |
|
"learning_rate": 1.978267497347951e-07, |
|
"loss": 0.4573, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.6546463245492372, |
|
"grad_norm": 0.4298498332500458, |
|
"learning_rate": 1.9676191600938474e-07, |
|
"loss": 0.3999, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.655755894590846, |
|
"grad_norm": 0.3248540759086609, |
|
"learning_rate": 1.9569809305072177e-07, |
|
"loss": 0.5563, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.6568654646324549, |
|
"grad_norm": 0.3533152639865875, |
|
"learning_rate": 1.9463530105631877e-07, |
|
"loss": 0.5788, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.6579750346740638, |
|
"grad_norm": 0.4187324047088623, |
|
"learning_rate": 1.9357356020411475e-07, |
|
"loss": 0.4424, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.6590846047156727, |
|
"grad_norm": 0.39432525634765625, |
|
"learning_rate": 1.925128906520917e-07, |
|
"loss": 0.5991, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.6601941747572816, |
|
"grad_norm": 0.5896446108818054, |
|
"learning_rate": 1.9145331253789253e-07, |
|
"loss": 0.5596, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.6613037447988904, |
|
"grad_norm": 0.36868834495544434, |
|
"learning_rate": 1.90394845978438e-07, |
|
"loss": 0.5265, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.6624133148404993, |
|
"grad_norm": 0.386819452047348, |
|
"learning_rate": 1.8933751106954535e-07, |
|
"loss": 0.5376, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.6635228848821082, |
|
"grad_norm": 0.3883580267429352, |
|
"learning_rate": 1.8828132788554638e-07, |
|
"loss": 0.4808, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.664632454923717, |
|
"grad_norm": 0.6747732162475586, |
|
"learning_rate": 1.8722631647890657e-07, |
|
"loss": 0.4478, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.665742024965326, |
|
"grad_norm": 0.2752344012260437, |
|
"learning_rate": 1.8617249687984434e-07, |
|
"loss": 0.5043, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6668515950069348, |
|
"grad_norm": 0.39787495136260986, |
|
"learning_rate": 1.8511988909595067e-07, |
|
"loss": 0.4899, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.6679611650485436, |
|
"grad_norm": 0.4791197180747986, |
|
"learning_rate": 1.8406851311180926e-07, |
|
"loss": 0.5389, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.6690707350901526, |
|
"grad_norm": 0.3360481858253479, |
|
"learning_rate": 1.8301838888861709e-07, |
|
"loss": 0.4804, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.6701803051317614, |
|
"grad_norm": 0.3347693681716919, |
|
"learning_rate": 1.819695363638055e-07, |
|
"loss": 0.558, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.6712898751733704, |
|
"grad_norm": 0.30149486660957336, |
|
"learning_rate": 1.809219754506618e-07, |
|
"loss": 0.4088, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.6723994452149792, |
|
"grad_norm": 0.30244728922843933, |
|
"learning_rate": 1.7987572603795078e-07, |
|
"loss": 0.5592, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.673509015256588, |
|
"grad_norm": 0.3256188929080963, |
|
"learning_rate": 1.7883080798953754e-07, |
|
"loss": 0.6117, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.674618585298197, |
|
"grad_norm": 0.36098477244377136, |
|
"learning_rate": 1.777872411440101e-07, |
|
"loss": 0.4261, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.6757281553398058, |
|
"grad_norm": 0.33403322100639343, |
|
"learning_rate": 1.767450453143029e-07, |
|
"loss": 0.503, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.6768377253814147, |
|
"grad_norm": 0.5139286518096924, |
|
"learning_rate": 1.757042402873205e-07, |
|
"loss": 0.44, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.6779472954230236, |
|
"grad_norm": 0.368712842464447, |
|
"learning_rate": 1.7466484582356212e-07, |
|
"loss": 0.4188, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.6790568654646325, |
|
"grad_norm": 0.3538356423377991, |
|
"learning_rate": 1.736268816567461e-07, |
|
"loss": 0.4575, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.6801664355062413, |
|
"grad_norm": 0.30539965629577637, |
|
"learning_rate": 1.725903674934357e-07, |
|
"loss": 0.4657, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.6812760055478502, |
|
"grad_norm": 0.2921498417854309, |
|
"learning_rate": 1.715553230126645e-07, |
|
"loss": 0.5406, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.6823855755894591, |
|
"grad_norm": 1.0049635171890259, |
|
"learning_rate": 1.705217678655633e-07, |
|
"loss": 0.5792, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.683495145631068, |
|
"grad_norm": 0.3126891851425171, |
|
"learning_rate": 1.6948972167498649e-07, |
|
"loss": 0.4519, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.6846047156726769, |
|
"grad_norm": 0.36534181237220764, |
|
"learning_rate": 1.684592040351398e-07, |
|
"loss": 0.4744, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 0.32556411623954773, |
|
"learning_rate": 1.674302345112083e-07, |
|
"loss": 0.5786, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.6868238557558946, |
|
"grad_norm": 0.42282480001449585, |
|
"learning_rate": 1.664028326389847e-07, |
|
"loss": 0.6001, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.6879334257975035, |
|
"grad_norm": 0.2856026887893677, |
|
"learning_rate": 1.6537701792449882e-07, |
|
"loss": 0.4948, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.6890429958391123, |
|
"grad_norm": 0.30700036883354187, |
|
"learning_rate": 1.6435280984364692e-07, |
|
"loss": 0.515, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.6901525658807213, |
|
"grad_norm": 0.4423519968986511, |
|
"learning_rate": 1.633302278418221e-07, |
|
"loss": 0.4761, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.6912621359223301, |
|
"grad_norm": 0.3314474821090698, |
|
"learning_rate": 1.6230929133354506e-07, |
|
"loss": 0.5463, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.6923717059639389, |
|
"grad_norm": 0.5163138508796692, |
|
"learning_rate": 1.6129001970209552e-07, |
|
"loss": 0.4718, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.6934812760055479, |
|
"grad_norm": 0.3462437093257904, |
|
"learning_rate": 1.6027243229914414e-07, |
|
"loss": 0.4545, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.6945908460471567, |
|
"grad_norm": 0.32218673825263977, |
|
"learning_rate": 1.5925654844438536e-07, |
|
"loss": 0.5148, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.6957004160887656, |
|
"grad_norm": 0.44593995809555054, |
|
"learning_rate": 1.582423874251703e-07, |
|
"loss": 0.4836, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.6968099861303745, |
|
"grad_norm": 0.3792048990726471, |
|
"learning_rate": 1.5722996849614066e-07, |
|
"loss": 0.5882, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.6979195561719833, |
|
"grad_norm": 0.33511149883270264, |
|
"learning_rate": 1.5621931087886324e-07, |
|
"loss": 0.5293, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.6990291262135923, |
|
"grad_norm": 0.4749152660369873, |
|
"learning_rate": 1.5521043376146494e-07, |
|
"loss": 0.4484, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.7001386962552011, |
|
"grad_norm": 0.36789247393608093, |
|
"learning_rate": 1.5420335629826856e-07, |
|
"loss": 0.5205, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.70124826629681, |
|
"grad_norm": 0.3322623074054718, |
|
"learning_rate": 1.5319809760942896e-07, |
|
"loss": 0.4483, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.7023578363384189, |
|
"grad_norm": 0.3089485466480255, |
|
"learning_rate": 1.5219467678057017e-07, |
|
"loss": 0.4467, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.7034674063800277, |
|
"grad_norm": 0.3379858136177063, |
|
"learning_rate": 1.511931128624231e-07, |
|
"loss": 0.5278, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.7045769764216366, |
|
"grad_norm": 0.3589012622833252, |
|
"learning_rate": 1.5019342487046355e-07, |
|
"loss": 0.4508, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.7056865464632455, |
|
"grad_norm": 0.36919447779655457, |
|
"learning_rate": 1.4919563178455153e-07, |
|
"loss": 0.4822, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.7067961165048544, |
|
"grad_norm": 0.3971253037452698, |
|
"learning_rate": 1.4819975254857066e-07, |
|
"loss": 0.5558, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.7079056865464632, |
|
"grad_norm": 0.37145859003067017, |
|
"learning_rate": 1.472058060700689e-07, |
|
"loss": 0.5777, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.7090152565880721, |
|
"grad_norm": 0.33810412883758545, |
|
"learning_rate": 1.46213811219899e-07, |
|
"loss": 0.4974, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.710124826629681, |
|
"grad_norm": 0.28394588828086853, |
|
"learning_rate": 1.452237868318606e-07, |
|
"loss": 0.4865, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.7112343966712898, |
|
"grad_norm": 0.6261323690414429, |
|
"learning_rate": 1.4423575170234267e-07, |
|
"loss": 0.5135, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.7123439667128988, |
|
"grad_norm": 0.5439554452896118, |
|
"learning_rate": 1.4324972458996638e-07, |
|
"loss": 0.596, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.7134535367545076, |
|
"grad_norm": 0.37857553362846375, |
|
"learning_rate": 1.422657242152293e-07, |
|
"loss": 0.6137, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.7145631067961165, |
|
"grad_norm": 0.5687951445579529, |
|
"learning_rate": 1.4128376926014957e-07, |
|
"loss": 0.425, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.7156726768377254, |
|
"grad_norm": 0.39318007230758667, |
|
"learning_rate": 1.4030387836791164e-07, |
|
"loss": 0.4716, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.7167822468793342, |
|
"grad_norm": 0.4043221175670624, |
|
"learning_rate": 1.3932607014251218e-07, |
|
"loss": 0.5187, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.7178918169209432, |
|
"grad_norm": 0.2890317440032959, |
|
"learning_rate": 1.3835036314840643e-07, |
|
"loss": 0.5747, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.719001386962552, |
|
"grad_norm": 0.2909344434738159, |
|
"learning_rate": 1.3737677591015657e-07, |
|
"loss": 0.4737, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.7201109570041608, |
|
"grad_norm": 0.373444139957428, |
|
"learning_rate": 1.364053269120791e-07, |
|
"loss": 0.5127, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.7212205270457698, |
|
"grad_norm": 0.3974605202674866, |
|
"learning_rate": 1.3543603459789466e-07, |
|
"loss": 0.4898, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.7223300970873786, |
|
"grad_norm": 0.3636089563369751, |
|
"learning_rate": 1.3446891737037762e-07, |
|
"loss": 0.5415, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.7234396671289876, |
|
"grad_norm": 0.38493213057518005, |
|
"learning_rate": 1.3350399359100623e-07, |
|
"loss": 0.4693, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.7245492371705964, |
|
"grad_norm": 0.3705246150493622, |
|
"learning_rate": 1.3254128157961486e-07, |
|
"loss": 0.5556, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.7256588072122053, |
|
"grad_norm": 0.501947820186615, |
|
"learning_rate": 1.3158079961404534e-07, |
|
"loss": 0.5332, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.7267683772538142, |
|
"grad_norm": 0.2879910171031952, |
|
"learning_rate": 1.3062256592980064e-07, |
|
"loss": 0.5306, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.727877947295423, |
|
"grad_norm": 0.6388247013092041, |
|
"learning_rate": 1.296665987196983e-07, |
|
"loss": 0.4829, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.7289875173370319, |
|
"grad_norm": 0.34465721249580383, |
|
"learning_rate": 1.2871291613352477e-07, |
|
"loss": 0.4307, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.7300970873786408, |
|
"grad_norm": 0.34914347529411316, |
|
"learning_rate": 1.2776153627769159e-07, |
|
"loss": 0.5307, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.7312066574202497, |
|
"grad_norm": 0.3551192283630371, |
|
"learning_rate": 1.2681247721489074e-07, |
|
"loss": 0.5591, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.7323162274618585, |
|
"grad_norm": 0.43952858448028564, |
|
"learning_rate": 1.2586575696375238e-07, |
|
"loss": 0.5065, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.7334257975034674, |
|
"grad_norm": 0.4221738576889038, |
|
"learning_rate": 1.249213934985025e-07, |
|
"loss": 0.5212, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.7345353675450763, |
|
"grad_norm": 0.5308628678321838, |
|
"learning_rate": 1.2397940474862144e-07, |
|
"loss": 0.4936, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.7356449375866851, |
|
"grad_norm": 0.45746496319770813, |
|
"learning_rate": 1.2303980859850402e-07, |
|
"loss": 0.4479, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.7367545076282941, |
|
"grad_norm": 0.38960978388786316, |
|
"learning_rate": 1.2210262288711933e-07, |
|
"loss": 0.4848, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.7378640776699029, |
|
"grad_norm": 0.7810402512550354, |
|
"learning_rate": 1.2116786540767267e-07, |
|
"loss": 0.4522, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.7389736477115117, |
|
"grad_norm": 0.7264504432678223, |
|
"learning_rate": 1.2023555390726748e-07, |
|
"loss": 0.5517, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.7400832177531207, |
|
"grad_norm": 0.3533945381641388, |
|
"learning_rate": 1.1930570608656803e-07, |
|
"loss": 0.4049, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.7411927877947295, |
|
"grad_norm": 0.25890418887138367, |
|
"learning_rate": 1.183783395994641e-07, |
|
"loss": 0.5448, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.7423023578363385, |
|
"grad_norm": 0.279067724943161, |
|
"learning_rate": 1.1745347205273506e-07, |
|
"loss": 0.5113, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.7434119278779473, |
|
"grad_norm": 0.31982362270355225, |
|
"learning_rate": 1.1653112100571619e-07, |
|
"loss": 0.5634, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.7445214979195561, |
|
"grad_norm": 0.3901461064815521, |
|
"learning_rate": 1.1561130396996508e-07, |
|
"loss": 0.5766, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.7456310679611651, |
|
"grad_norm": 0.32104188203811646, |
|
"learning_rate": 1.146940384089288e-07, |
|
"loss": 0.4248, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.7467406380027739, |
|
"grad_norm": 0.27771735191345215, |
|
"learning_rate": 1.1377934173761311e-07, |
|
"loss": 0.4721, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.7478502080443828, |
|
"grad_norm": 0.4484061300754547, |
|
"learning_rate": 1.1286723132225095e-07, |
|
"loss": 0.4968, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.7489597780859917, |
|
"grad_norm": 0.2738656997680664, |
|
"learning_rate": 1.1195772447997348e-07, |
|
"loss": 0.5468, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.7500693481276005, |
|
"grad_norm": 0.3913407325744629, |
|
"learning_rate": 1.1105083847848101e-07, |
|
"loss": 0.5727, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 0.7511789181692095, |
|
"grad_norm": 0.46406638622283936, |
|
"learning_rate": 1.1014659053571476e-07, |
|
"loss": 0.4827, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 0.7522884882108183, |
|
"grad_norm": 0.30459386110305786, |
|
"learning_rate": 1.092449978195308e-07, |
|
"loss": 0.5731, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 0.7533980582524272, |
|
"grad_norm": 0.4219912588596344, |
|
"learning_rate": 1.0834607744737329e-07, |
|
"loss": 0.5629, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 0.7545076282940361, |
|
"grad_norm": 0.5843199491500854, |
|
"learning_rate": 1.0744984648595006e-07, |
|
"loss": 0.5359, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.7556171983356449, |
|
"grad_norm": 0.31454548239707947, |
|
"learning_rate": 1.0655632195090822e-07, |
|
"loss": 0.4659, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 0.7567267683772538, |
|
"grad_norm": 0.4814988374710083, |
|
"learning_rate": 1.0566552080651133e-07, |
|
"loss": 0.4961, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.7578363384188627, |
|
"grad_norm": 0.32601091265678406, |
|
"learning_rate": 1.0477745996531739e-07, |
|
"loss": 0.4892, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 0.7589459084604716, |
|
"grad_norm": 0.46707651019096375, |
|
"learning_rate": 1.0389215628785725e-07, |
|
"loss": 0.4755, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 0.7600554785020804, |
|
"grad_norm": 0.31303638219833374, |
|
"learning_rate": 1.0300962658231521e-07, |
|
"loss": 0.4734, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.7611650485436893, |
|
"grad_norm": 0.3001532554626465, |
|
"learning_rate": 1.0212988760420918e-07, |
|
"loss": 0.5897, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 0.7622746185852982, |
|
"grad_norm": 0.26135823130607605, |
|
"learning_rate": 1.0125295605607324e-07, |
|
"loss": 0.5347, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 0.763384188626907, |
|
"grad_norm": 0.5185014009475708, |
|
"learning_rate": 1.0037884858714012e-07, |
|
"loss": 0.4531, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.764493758668516, |
|
"grad_norm": 0.4258882999420166, |
|
"learning_rate": 9.950758179302504e-08, |
|
"loss": 0.5889, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 0.7656033287101248, |
|
"grad_norm": 0.2963704466819763, |
|
"learning_rate": 9.863917221541104e-08, |
|
"loss": 0.4763, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.7667128987517337, |
|
"grad_norm": 0.41408637166023254, |
|
"learning_rate": 9.777363634173436e-08, |
|
"loss": 0.4918, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 0.7678224687933426, |
|
"grad_norm": 0.563586950302124, |
|
"learning_rate": 9.691099060487196e-08, |
|
"loss": 0.5427, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 0.7689320388349514, |
|
"grad_norm": 0.41873815655708313, |
|
"learning_rate": 9.605125138282935e-08, |
|
"loss": 0.4846, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 0.7700416088765604, |
|
"grad_norm": 0.333218514919281, |
|
"learning_rate": 9.519443499842919e-08, |
|
"loss": 0.4646, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 0.7711511789181692, |
|
"grad_norm": 0.3226572573184967, |
|
"learning_rate": 9.434055771900227e-08, |
|
"loss": 0.5374, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.772260748959778, |
|
"grad_norm": 0.3277279734611511, |
|
"learning_rate": 9.348963575607771e-08, |
|
"loss": 0.5319, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 0.773370319001387, |
|
"grad_norm": 0.3328832983970642, |
|
"learning_rate": 9.264168526507593e-08, |
|
"loss": 0.593, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 0.7744798890429958, |
|
"grad_norm": 0.4502674341201782, |
|
"learning_rate": 9.179672234500166e-08, |
|
"loss": 0.4532, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 0.7755894590846047, |
|
"grad_norm": 0.3654020130634308, |
|
"learning_rate": 9.095476303813796e-08, |
|
"loss": 0.4858, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 0.7766990291262136, |
|
"grad_norm": 0.2795443832874298, |
|
"learning_rate": 9.011582332974227e-08, |
|
"loss": 0.4836, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7778085991678225, |
|
"grad_norm": 0.4435333013534546, |
|
"learning_rate": 8.927991914774227e-08, |
|
"loss": 0.5314, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 0.7789181692094314, |
|
"grad_norm": 0.41879114508628845, |
|
"learning_rate": 8.844706636243404e-08, |
|
"loss": 0.4772, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 0.7800277392510402, |
|
"grad_norm": 0.2757185399532318, |
|
"learning_rate": 8.761728078618049e-08, |
|
"loss": 0.513, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 0.7811373092926491, |
|
"grad_norm": 0.4560401439666748, |
|
"learning_rate": 8.679057817311095e-08, |
|
"loss": 0.5303, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.782246879334258, |
|
"grad_norm": 0.3912280797958374, |
|
"learning_rate": 8.596697421882257e-08, |
|
"loss": 0.4567, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.7833564493758669, |
|
"grad_norm": 0.5057780146598816, |
|
"learning_rate": 8.514648456008173e-08, |
|
"loss": 0.4742, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 0.7844660194174757, |
|
"grad_norm": 0.33308303356170654, |
|
"learning_rate": 8.43291247745277e-08, |
|
"loss": 0.5547, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 0.7855755894590846, |
|
"grad_norm": 0.3485460877418518, |
|
"learning_rate": 8.351491038037662e-08, |
|
"loss": 0.4894, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 0.7866851595006935, |
|
"grad_norm": 0.46615713834762573, |
|
"learning_rate": 8.270385683612674e-08, |
|
"loss": 0.3763, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 0.7877947295423023, |
|
"grad_norm": 0.3317703902721405, |
|
"learning_rate": 8.189597954026539e-08, |
|
"loss": 0.4526, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.7889042995839113, |
|
"grad_norm": 0.4380096197128296, |
|
"learning_rate": 8.1091293830976e-08, |
|
"loss": 0.5891, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 0.7900138696255201, |
|
"grad_norm": 0.3943984806537628, |
|
"learning_rate": 8.028981498584745e-08, |
|
"loss": 0.563, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 0.791123439667129, |
|
"grad_norm": 0.3383914828300476, |
|
"learning_rate": 7.949155822158385e-08, |
|
"loss": 0.5196, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.7922330097087379, |
|
"grad_norm": 0.33651596307754517, |
|
"learning_rate": 7.869653869371528e-08, |
|
"loss": 0.5427, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 0.7933425797503467, |
|
"grad_norm": 0.42295658588409424, |
|
"learning_rate": 7.790477149631072e-08, |
|
"loss": 0.5018, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.7944521497919557, |
|
"grad_norm": 0.39132261276245117, |
|
"learning_rate": 7.711627166169073e-08, |
|
"loss": 0.4734, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 0.7955617198335645, |
|
"grad_norm": 0.36338910460472107, |
|
"learning_rate": 7.633105416014277e-08, |
|
"loss": 0.4265, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 0.7966712898751733, |
|
"grad_norm": 0.3046381175518036, |
|
"learning_rate": 7.554913389963646e-08, |
|
"loss": 0.4241, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 0.7977808599167823, |
|
"grad_norm": 0.3695002794265747, |
|
"learning_rate": 7.477052572554065e-08, |
|
"loss": 0.4685, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 0.7988904299583911, |
|
"grad_norm": 0.3680543601512909, |
|
"learning_rate": 7.399524442034188e-08, |
|
"loss": 0.5151, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.4030877649784088, |
|
"learning_rate": 7.322330470336313e-08, |
|
"loss": 0.568, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 0.8011095700416089, |
|
"grad_norm": 0.5655102729797363, |
|
"learning_rate": 7.245472123048499e-08, |
|
"loss": 0.4919, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 0.8022191400832177, |
|
"grad_norm": 0.3112393319606781, |
|
"learning_rate": 7.168950859386714e-08, |
|
"loss": 0.5639, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 0.8033287101248267, |
|
"grad_norm": 2.190739393234253, |
|
"learning_rate": 7.092768132167098e-08, |
|
"loss": 0.5222, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 0.8044382801664355, |
|
"grad_norm": 0.33552542328834534, |
|
"learning_rate": 7.01692538777845e-08, |
|
"loss": 0.5515, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.8055478502080444, |
|
"grad_norm": 0.5092620253562927, |
|
"learning_rate": 6.941424066154697e-08, |
|
"loss": 0.6103, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 0.8066574202496533, |
|
"grad_norm": 0.4809440076351166, |
|
"learning_rate": 6.866265600747604e-08, |
|
"loss": 0.5302, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 0.8077669902912621, |
|
"grad_norm": 0.41583776473999023, |
|
"learning_rate": 6.79145141849955e-08, |
|
"loss": 0.5071, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 0.808876560332871, |
|
"grad_norm": 0.3800281882286072, |
|
"learning_rate": 6.716982939816398e-08, |
|
"loss": 0.42, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 0.8099861303744799, |
|
"grad_norm": 0.32890620827674866, |
|
"learning_rate": 6.642861578540595e-08, |
|
"loss": 0.422, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.8110957004160888, |
|
"grad_norm": 0.33213478326797485, |
|
"learning_rate": 6.569088741924261e-08, |
|
"loss": 0.4859, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 0.8122052704576976, |
|
"grad_norm": 0.2846478521823883, |
|
"learning_rate": 6.495665830602518e-08, |
|
"loss": 0.5174, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 0.8133148404993065, |
|
"grad_norm": 0.5666544437408447, |
|
"learning_rate": 6.42259423856689e-08, |
|
"loss": 0.4581, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 0.8144244105409154, |
|
"grad_norm": 0.39947426319122314, |
|
"learning_rate": 6.349875353138801e-08, |
|
"loss": 0.4929, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 0.8155339805825242, |
|
"grad_norm": 0.7812756299972534, |
|
"learning_rate": 6.277510554943294e-08, |
|
"loss": 0.5503, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.8166435506241332, |
|
"grad_norm": 0.2935800850391388, |
|
"learning_rate": 6.205501217882766e-08, |
|
"loss": 0.5464, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.817753120665742, |
|
"grad_norm": 0.5092408061027527, |
|
"learning_rate": 6.13384870911092e-08, |
|
"loss": 0.515, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 0.8188626907073508, |
|
"grad_norm": 0.3239974081516266, |
|
"learning_rate": 6.062554389006794e-08, |
|
"loss": 0.5617, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 0.8199722607489598, |
|
"grad_norm": 0.3289891481399536, |
|
"learning_rate": 5.991619611148918e-08, |
|
"loss": 0.4832, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 0.8210818307905686, |
|
"grad_norm": 0.37206193804740906, |
|
"learning_rate": 5.9210457222896524e-08, |
|
"loss": 0.4863, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.8221914008321776, |
|
"grad_norm": 0.3518655300140381, |
|
"learning_rate": 5.850834062329574e-08, |
|
"loss": 0.4942, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 0.8233009708737864, |
|
"grad_norm": 0.3711952567100525, |
|
"learning_rate": 5.780985964292079e-08, |
|
"loss": 0.5641, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 0.8244105409153952, |
|
"grad_norm": 0.41170036792755127, |
|
"learning_rate": 5.711502754298059e-08, |
|
"loss": 0.4882, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 0.8255201109570042, |
|
"grad_norm": 0.5306410193443298, |
|
"learning_rate": 5.6423857515406876e-08, |
|
"loss": 0.5864, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.826629680998613, |
|
"grad_norm": 0.34095829725265503, |
|
"learning_rate": 5.573636268260451e-08, |
|
"loss": 0.5834, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.827739251040222, |
|
"grad_norm": 0.245326429605484, |
|
"learning_rate": 5.5052556097201525e-08, |
|
"loss": 0.4505, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 0.8288488210818308, |
|
"grad_norm": 0.514102041721344, |
|
"learning_rate": 5.437245074180191e-08, |
|
"loss": 0.4891, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 0.8299583911234397, |
|
"grad_norm": 0.8509578704833984, |
|
"learning_rate": 5.369605952873887e-08, |
|
"loss": 0.6081, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 0.8310679611650486, |
|
"grad_norm": 0.35718920826911926, |
|
"learning_rate": 5.302339529982961e-08, |
|
"loss": 0.5393, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 0.8321775312066574, |
|
"grad_norm": 0.35100287199020386, |
|
"learning_rate": 5.2354470826131785e-08, |
|
"loss": 0.5476, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.8332871012482663, |
|
"grad_norm": 0.37764522433280945, |
|
"learning_rate": 5.168929880770062e-08, |
|
"loss": 0.475, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 0.8343966712898752, |
|
"grad_norm": 0.4380689859390259, |
|
"learning_rate": 5.102789187334827e-08, |
|
"loss": 0.4952, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 0.8355062413314841, |
|
"grad_norm": 0.40711092948913574, |
|
"learning_rate": 5.0370262580403775e-08, |
|
"loss": 0.4711, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 0.8366158113730929, |
|
"grad_norm": 0.3597396910190582, |
|
"learning_rate": 4.9716423414474515e-08, |
|
"loss": 0.4656, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 0.8377253814147018, |
|
"grad_norm": 0.30235543847084045, |
|
"learning_rate": 4.906638678920963e-08, |
|
"loss": 0.5144, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.8388349514563107, |
|
"grad_norm": 0.3047267198562622, |
|
"learning_rate": 4.842016504606375e-08, |
|
"loss": 0.4962, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 0.8399445214979195, |
|
"grad_norm": 0.296040415763855, |
|
"learning_rate": 4.777777045406314e-08, |
|
"loss": 0.4285, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 0.8410540915395285, |
|
"grad_norm": 0.2601630985736847, |
|
"learning_rate": 4.71392152095727e-08, |
|
"loss": 0.4683, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 0.8421636615811373, |
|
"grad_norm": 0.42486798763275146, |
|
"learning_rate": 4.6504511436064014e-08, |
|
"loss": 0.5188, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 0.8432732316227461, |
|
"grad_norm": 0.4439660310745239, |
|
"learning_rate": 4.587367118388577e-08, |
|
"loss": 0.4948, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.8443828016643551, |
|
"grad_norm": 0.36130619049072266, |
|
"learning_rate": 4.5246706430034445e-08, |
|
"loss": 0.52, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 0.8454923717059639, |
|
"grad_norm": 0.3554399609565735, |
|
"learning_rate": 4.4623629077927296e-08, |
|
"loss": 0.4171, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 0.8466019417475729, |
|
"grad_norm": 0.3487074673175812, |
|
"learning_rate": 4.40044509571762e-08, |
|
"loss": 0.4701, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 0.8477115117891817, |
|
"grad_norm": 0.7752673029899597, |
|
"learning_rate": 4.338918382336296e-08, |
|
"loss": 0.4984, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 0.8488210818307905, |
|
"grad_norm": 0.3020077347755432, |
|
"learning_rate": 4.277783935781637e-08, |
|
"loss": 0.4251, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.8499306518723995, |
|
"grad_norm": 3.607598304748535, |
|
"learning_rate": 4.217042916739011e-08, |
|
"loss": 0.4703, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 0.8510402219140083, |
|
"grad_norm": 0.4934079945087433, |
|
"learning_rate": 4.156696478424279e-08, |
|
"loss": 0.4898, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 0.8521497919556172, |
|
"grad_norm": 0.3440416753292084, |
|
"learning_rate": 4.096745766561857e-08, |
|
"loss": 0.4242, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.8532593619972261, |
|
"grad_norm": 0.43925386667251587, |
|
"learning_rate": 4.0371919193629975e-08, |
|
"loss": 0.5167, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 0.8543689320388349, |
|
"grad_norm": 0.40181154012680054, |
|
"learning_rate": 3.9780360675041675e-08, |
|
"loss": 0.4832, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.8554785020804438, |
|
"grad_norm": 0.49073562026023865, |
|
"learning_rate": 3.9192793341055655e-08, |
|
"loss": 0.4619, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 0.8565880721220527, |
|
"grad_norm": 0.3399178087711334, |
|
"learning_rate": 3.860922834709832e-08, |
|
"loss": 0.4904, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 0.8576976421636616, |
|
"grad_norm": 0.3309305012226105, |
|
"learning_rate": 3.8029676772608324e-08, |
|
"loss": 0.5175, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 0.8588072122052705, |
|
"grad_norm": 0.33893635869026184, |
|
"learning_rate": 3.745414962082655e-08, |
|
"loss": 0.5904, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 0.8599167822468793, |
|
"grad_norm": 0.4869129955768585, |
|
"learning_rate": 3.688265781858707e-08, |
|
"loss": 0.4194, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.8610263522884882, |
|
"grad_norm": 0.4826425015926361, |
|
"learning_rate": 3.631521221610953e-08, |
|
"loss": 0.4774, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 0.8621359223300971, |
|
"grad_norm": 0.4436647295951843, |
|
"learning_rate": 3.575182358679349e-08, |
|
"loss": 0.5091, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 0.863245492371706, |
|
"grad_norm": 0.3870086669921875, |
|
"learning_rate": 3.5192502627013535e-08, |
|
"loss": 0.4934, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 0.8643550624133148, |
|
"grad_norm": 0.3462676405906677, |
|
"learning_rate": 3.463725995591646e-08, |
|
"loss": 0.5185, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 0.8654646324549237, |
|
"grad_norm": 0.3750855028629303, |
|
"learning_rate": 3.408610611521959e-08, |
|
"loss": 0.4889, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.8665742024965326, |
|
"grad_norm": 0.39250943064689636, |
|
"learning_rate": 3.3539051569010376e-08, |
|
"loss": 0.5594, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 0.8676837725381414, |
|
"grad_norm": 0.6177974343299866, |
|
"learning_rate": 3.29961067035483e-08, |
|
"loss": 0.5567, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 0.8687933425797504, |
|
"grad_norm": 0.8788308501243591, |
|
"learning_rate": 3.245728182706695e-08, |
|
"loss": 0.5487, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 0.8699029126213592, |
|
"grad_norm": 0.3534790277481079, |
|
"learning_rate": 3.1922587169578965e-08, |
|
"loss": 0.5047, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 0.871012482662968, |
|
"grad_norm": 0.7439823746681213, |
|
"learning_rate": 3.1392032882681524e-08, |
|
"loss": 0.619, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.872122052704577, |
|
"grad_norm": 0.43660464882850647, |
|
"learning_rate": 3.086562903936343e-08, |
|
"loss": 0.5613, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 0.8732316227461858, |
|
"grad_norm": 0.2849920988082886, |
|
"learning_rate": 3.0343385633814336e-08, |
|
"loss": 0.5407, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 0.8743411927877948, |
|
"grad_norm": 0.46800124645233154, |
|
"learning_rate": 2.982531258123447e-08, |
|
"loss": 0.5268, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 0.8754507628294036, |
|
"grad_norm": 0.33923402428627014, |
|
"learning_rate": 2.931141971764675e-08, |
|
"loss": 0.5359, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 0.8765603328710125, |
|
"grad_norm": 0.5203589200973511, |
|
"learning_rate": 2.880171679971005e-08, |
|
"loss": 0.4298, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.8776699029126214, |
|
"grad_norm": 0.48814857006073, |
|
"learning_rate": 2.8296213504533596e-08, |
|
"loss": 0.4622, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 0.8787794729542302, |
|
"grad_norm": 0.32717978954315186, |
|
"learning_rate": 2.779491942949369e-08, |
|
"loss": 0.4351, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 0.8798890429958391, |
|
"grad_norm": 0.33301976323127747, |
|
"learning_rate": 2.7297844092051104e-08, |
|
"loss": 0.4853, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 0.880998613037448, |
|
"grad_norm": 0.42914196848869324, |
|
"learning_rate": 2.680499692957078e-08, |
|
"loss": 0.5133, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 0.8821081830790569, |
|
"grad_norm": 0.3375394344329834, |
|
"learning_rate": 2.6316387299142374e-08, |
|
"loss": 0.514, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.8832177531206657, |
|
"grad_norm": 0.33002445101737976, |
|
"learning_rate": 2.5832024477402543e-08, |
|
"loss": 0.4487, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 0.8843273231622746, |
|
"grad_norm": 0.5125333666801453, |
|
"learning_rate": 2.535191766035913e-08, |
|
"loss": 0.5942, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 0.8854368932038835, |
|
"grad_norm": 0.424376517534256, |
|
"learning_rate": 2.4876075963216226e-08, |
|
"loss": 0.5574, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 0.8865464632454924, |
|
"grad_norm": 1.1631702184677124, |
|
"learning_rate": 2.4404508420201446e-08, |
|
"loss": 0.5152, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 0.8876560332871013, |
|
"grad_norm": 0.42409613728523254, |
|
"learning_rate": 2.3937223984394212e-08, |
|
"loss": 0.5859, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8887656033287101, |
|
"grad_norm": 0.32093319296836853, |
|
"learning_rate": 2.3474231527555595e-08, |
|
"loss": 0.5776, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 0.889875173370319, |
|
"grad_norm": 0.44207271933555603, |
|
"learning_rate": 2.301553983996041e-08, |
|
"loss": 0.5397, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 0.8909847434119279, |
|
"grad_norm": 0.29899469017982483, |
|
"learning_rate": 2.2561157630229673e-08, |
|
"loss": 0.5171, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 0.8920943134535367, |
|
"grad_norm": 0.49811238050460815, |
|
"learning_rate": 2.2111093525165826e-08, |
|
"loss": 0.5837, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 0.8932038834951457, |
|
"grad_norm": 0.36482110619544983, |
|
"learning_rate": 2.1665356069588607e-08, |
|
"loss": 0.5252, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.8943134535367545, |
|
"grad_norm": 0.40325450897216797, |
|
"learning_rate": 2.1223953726172917e-08, |
|
"loss": 0.5157, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.8954230235783633, |
|
"grad_norm": 0.2874036431312561, |
|
"learning_rate": 2.078689487528823e-08, |
|
"loss": 0.5223, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 0.8965325936199723, |
|
"grad_norm": 0.460287481546402, |
|
"learning_rate": 2.0354187814839248e-08, |
|
"loss": 0.6041, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 0.8976421636615811, |
|
"grad_norm": 0.3832845687866211, |
|
"learning_rate": 1.992584076010867e-08, |
|
"loss": 0.5905, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 0.8987517337031901, |
|
"grad_norm": 0.47805336117744446, |
|
"learning_rate": 1.9501861843601114e-08, |
|
"loss": 0.4894, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.8998613037447989, |
|
"grad_norm": 0.34525078535079956, |
|
"learning_rate": 1.9082259114888477e-08, |
|
"loss": 0.555, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 0.9009708737864077, |
|
"grad_norm": 0.37707841396331787, |
|
"learning_rate": 1.8667040540457423e-08, |
|
"loss": 0.4523, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 0.9020804438280167, |
|
"grad_norm": 0.4305242896080017, |
|
"learning_rate": 1.8256214003558035e-08, |
|
"loss": 0.5538, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 0.9031900138696255, |
|
"grad_norm": 0.4085891842842102, |
|
"learning_rate": 1.7849787304054093e-08, |
|
"loss": 0.5101, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 0.9042995839112344, |
|
"grad_norm": 0.44601982831954956, |
|
"learning_rate": 1.7447768158274923e-08, |
|
"loss": 0.4732, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.9054091539528433, |
|
"grad_norm": 0.3422205150127411, |
|
"learning_rate": 1.7050164198869148e-08, |
|
"loss": 0.4478, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 0.9065187239944521, |
|
"grad_norm": 0.26549020409584045, |
|
"learning_rate": 1.6656982974659563e-08, |
|
"loss": 0.5429, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 0.907628294036061, |
|
"grad_norm": 0.47383949160575867, |
|
"learning_rate": 1.6268231950499727e-08, |
|
"loss": 0.5087, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 0.9087378640776699, |
|
"grad_norm": 1.3218978643417358, |
|
"learning_rate": 1.5883918507132637e-08, |
|
"loss": 0.5044, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 0.9098474341192788, |
|
"grad_norm": 0.3553486168384552, |
|
"learning_rate": 1.550404994105009e-08, |
|
"loss": 0.5442, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.9109570041608877, |
|
"grad_norm": 0.2937772274017334, |
|
"learning_rate": 1.5128633464354584e-08, |
|
"loss": 0.4458, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 0.9120665742024965, |
|
"grad_norm": 0.49511197209358215, |
|
"learning_rate": 1.475767620462215e-08, |
|
"loss": 0.4199, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 0.9131761442441054, |
|
"grad_norm": 0.7402114272117615, |
|
"learning_rate": 1.439118520476701e-08, |
|
"loss": 0.5255, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 0.44214996695518494, |
|
"learning_rate": 1.4029167422908105e-08, |
|
"loss": 0.4961, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 0.9153952843273232, |
|
"grad_norm": 0.42241570353507996, |
|
"learning_rate": 1.3671629732236679e-08, |
|
"loss": 0.5096, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.916504854368932, |
|
"grad_norm": 0.5015203952789307, |
|
"learning_rate": 1.3318578920886003e-08, |
|
"loss": 0.549, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 0.9176144244105409, |
|
"grad_norm": 0.4513172209262848, |
|
"learning_rate": 1.2970021691802475e-08, |
|
"loss": 0.5027, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 0.9187239944521498, |
|
"grad_norm": 0.3667598366737366, |
|
"learning_rate": 1.2625964662618172e-08, |
|
"loss": 0.4524, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 0.9198335644937586, |
|
"grad_norm": 0.3975818157196045, |
|
"learning_rate": 1.2286414365525494e-08, |
|
"loss": 0.4872, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 0.9209431345353676, |
|
"grad_norm": 0.4363032281398773, |
|
"learning_rate": 1.1951377247152867e-08, |
|
"loss": 0.6175, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.9220527045769764, |
|
"grad_norm": 0.2995266318321228, |
|
"learning_rate": 1.162085966844259e-08, |
|
"loss": 0.5223, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 0.9231622746185852, |
|
"grad_norm": 0.29473140835762024, |
|
"learning_rate": 1.1294867904529992e-08, |
|
"loss": 0.5011, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 0.9242718446601942, |
|
"grad_norm": 0.3131171464920044, |
|
"learning_rate": 1.097340814462408e-08, |
|
"loss": 0.5525, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 0.925381414701803, |
|
"grad_norm": 0.29646238684654236, |
|
"learning_rate": 1.065648649189041e-08, |
|
"loss": 0.4261, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 0.926490984743412, |
|
"grad_norm": 0.3700522482395172, |
|
"learning_rate": 1.0344108963334847e-08, |
|
"loss": 0.4667, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.9276005547850208, |
|
"grad_norm": 0.5135470628738403, |
|
"learning_rate": 1.003628148968963e-08, |
|
"loss": 0.5734, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 0.9287101248266297, |
|
"grad_norm": 0.3333655595779419, |
|
"learning_rate": 9.733009915300628e-09, |
|
"loss": 0.5045, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.9298196948682386, |
|
"grad_norm": 0.5014081001281738, |
|
"learning_rate": 9.434299998016287e-09, |
|
"loss": 0.5693, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 0.9309292649098474, |
|
"grad_norm": 0.33439749479293823, |
|
"learning_rate": 9.140157409078559e-09, |
|
"loss": 0.5434, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 0.9320388349514563, |
|
"grad_norm": 0.28119370341300964, |
|
"learning_rate": 8.850587733014947e-09, |
|
"loss": 0.5789, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.9331484049930652, |
|
"grad_norm": 0.3373952805995941, |
|
"learning_rate": 8.565596467532715e-09, |
|
"loss": 0.4614, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 0.9342579750346741, |
|
"grad_norm": 0.4669179916381836, |
|
"learning_rate": 8.28518902341438e-09, |
|
"loss": 0.5021, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 0.935367545076283, |
|
"grad_norm": 0.45809802412986755, |
|
"learning_rate": 8.009370724415015e-09, |
|
"loss": 0.5104, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 0.9364771151178918, |
|
"grad_norm": 0.40159252285957336, |
|
"learning_rate": 7.738146807161255e-09, |
|
"loss": 0.5569, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 0.9375866851595007, |
|
"grad_norm": 0.34096261858940125, |
|
"learning_rate": 7.471522421051618e-09, |
|
"loss": 0.5477, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.9386962552011096, |
|
"grad_norm": 0.4823736250400543, |
|
"learning_rate": 7.209502628159142e-09, |
|
"loss": 0.4552, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 0.9398058252427185, |
|
"grad_norm": 0.37772753834724426, |
|
"learning_rate": 6.952092403134851e-09, |
|
"loss": 0.4999, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 0.9409153952843273, |
|
"grad_norm": 0.44477227330207825, |
|
"learning_rate": 6.69929663311361e-09, |
|
"loss": 0.596, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 0.9420249653259362, |
|
"grad_norm": 0.27438193559646606, |
|
"learning_rate": 6.451120117621306e-09, |
|
"loss": 0.483, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 0.9431345353675451, |
|
"grad_norm": 0.3919523060321808, |
|
"learning_rate": 6.2075675684835075e-09, |
|
"loss": 0.4991, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.9442441054091539, |
|
"grad_norm": 0.36142680048942566, |
|
"learning_rate": 5.968643609736257e-09, |
|
"loss": 0.4884, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 0.9453536754507629, |
|
"grad_norm": 0.37626388669013977, |
|
"learning_rate": 5.734352777538143e-09, |
|
"loss": 0.473, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 0.9464632454923717, |
|
"grad_norm": 0.8457902073860168, |
|
"learning_rate": 5.504699520084227e-09, |
|
"loss": 0.4457, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 0.9475728155339805, |
|
"grad_norm": 0.26701247692108154, |
|
"learning_rate": 5.279688197521643e-09, |
|
"loss": 0.506, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 0.9486823855755895, |
|
"grad_norm": 0.33689752221107483, |
|
"learning_rate": 5.059323081866601e-09, |
|
"loss": 0.4893, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.9497919556171983, |
|
"grad_norm": 0.30724218487739563, |
|
"learning_rate": 4.8436083569236004e-09, |
|
"loss": 0.4775, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 0.9509015256588073, |
|
"grad_norm": 0.3921775817871094, |
|
"learning_rate": 4.632548118205681e-09, |
|
"loss": 0.6024, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 0.9520110957004161, |
|
"grad_norm": 0.465593159198761, |
|
"learning_rate": 4.4261463728569315e-09, |
|
"loss": 0.5698, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 0.9531206657420249, |
|
"grad_norm": 0.3770931661128998, |
|
"learning_rate": 4.224407039576244e-09, |
|
"loss": 0.4477, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 0.9542302357836339, |
|
"grad_norm": 0.7365812063217163, |
|
"learning_rate": 4.027333948542932e-09, |
|
"loss": 0.4356, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.9553398058252427, |
|
"grad_norm": 0.4825473427772522, |
|
"learning_rate": 3.834930841344119e-09, |
|
"loss": 0.5003, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 0.9564493758668516, |
|
"grad_norm": 0.4335998296737671, |
|
"learning_rate": 3.6472013709035464e-09, |
|
"loss": 0.5292, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 0.9575589459084605, |
|
"grad_norm": 0.3976069688796997, |
|
"learning_rate": 3.4641491014123224e-09, |
|
"loss": 0.6148, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 0.9586685159500693, |
|
"grad_norm": 0.45377933979034424, |
|
"learning_rate": 3.2857775082613115e-09, |
|
"loss": 0.5478, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 0.9597780859916782, |
|
"grad_norm": 0.35374292731285095, |
|
"learning_rate": 3.1120899779749354e-09, |
|
"loss": 0.4997, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.9608876560332871, |
|
"grad_norm": 0.9163030385971069, |
|
"learning_rate": 2.9430898081471144e-09, |
|
"loss": 0.5127, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 0.961997226074896, |
|
"grad_norm": 0.4174667000770569, |
|
"learning_rate": 2.7787802073784563e-09, |
|
"loss": 0.4448, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 0.9631067961165048, |
|
"grad_norm": 0.41283664107322693, |
|
"learning_rate": 2.619164295215581e-09, |
|
"loss": 0.5467, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.9642163661581137, |
|
"grad_norm": 0.39738011360168457, |
|
"learning_rate": 2.4642451020916165e-09, |
|
"loss": 0.5459, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 0.9653259361997226, |
|
"grad_norm": 0.4814308285713196, |
|
"learning_rate": 2.314025569268879e-09, |
|
"loss": 0.4956, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.9664355062413315, |
|
"grad_norm": 0.3121536076068878, |
|
"learning_rate": 2.1685085487829493e-09, |
|
"loss": 0.5044, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 0.9675450762829404, |
|
"grad_norm": 0.5314778089523315, |
|
"learning_rate": 2.0276968033884347e-09, |
|
"loss": 0.5479, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 0.9686546463245492, |
|
"grad_norm": 0.49265632033348083, |
|
"learning_rate": 1.8915930065067365e-09, |
|
"loss": 0.4362, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 0.9697642163661581, |
|
"grad_norm": 0.3280515670776367, |
|
"learning_rate": 1.760199742175089e-09, |
|
"loss": 0.4533, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 0.3209471106529236, |
|
"learning_rate": 1.6335195049975992e-09, |
|
"loss": 0.523, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.9719833564493758, |
|
"grad_norm": 0.4209744334220886, |
|
"learning_rate": 1.5115547000978113e-09, |
|
"loss": 0.4551, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 0.9730929264909848, |
|
"grad_norm": 0.4232068359851837, |
|
"learning_rate": 1.3943076430731614e-09, |
|
"loss": 0.4994, |
|
"step": 17540 |
|
}, |
|
{ |
|
"epoch": 0.9742024965325936, |
|
"grad_norm": 0.513118326663971, |
|
"learning_rate": 1.2817805599509014e-09, |
|
"loss": 0.5737, |
|
"step": 17560 |
|
}, |
|
{ |
|
"epoch": 0.9753120665742026, |
|
"grad_norm": 0.4443225860595703, |
|
"learning_rate": 1.173975587145909e-09, |
|
"loss": 0.5862, |
|
"step": 17580 |
|
}, |
|
{ |
|
"epoch": 0.9764216366158114, |
|
"grad_norm": 0.40851354598999023, |
|
"learning_rate": 1.0708947714200557e-09, |
|
"loss": 0.5229, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.9775312066574202, |
|
"grad_norm": 0.2527603209018707, |
|
"learning_rate": 9.725400698434583e-10, |
|
"loss": 0.5171, |
|
"step": 17620 |
|
}, |
|
{ |
|
"epoch": 0.9786407766990292, |
|
"grad_norm": 0.43800926208496094, |
|
"learning_rate": 8.789133497571488e-10, |
|
"loss": 0.5315, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 0.979750346740638, |
|
"grad_norm": 0.33943334221839905, |
|
"learning_rate": 7.900163887377964e-10, |
|
"loss": 0.5071, |
|
"step": 17660 |
|
}, |
|
{ |
|
"epoch": 0.9808599167822469, |
|
"grad_norm": 0.5179576277732849, |
|
"learning_rate": 7.058508745639014e-10, |
|
"loss": 0.5261, |
|
"step": 17680 |
|
}, |
|
{ |
|
"epoch": 0.9819694868238558, |
|
"grad_norm": 0.6271900534629822, |
|
"learning_rate": 6.264184051837096e-10, |
|
"loss": 0.4876, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.9830790568654646, |
|
"grad_norm": 0.5471246838569641, |
|
"learning_rate": 5.517204886848758e-10, |
|
"loss": 0.4974, |
|
"step": 17720 |
|
}, |
|
{ |
|
"epoch": 0.9841886269070735, |
|
"grad_norm": 0.30157485604286194, |
|
"learning_rate": 4.817585432659032e-10, |
|
"loss": 0.4899, |
|
"step": 17740 |
|
}, |
|
{ |
|
"epoch": 0.9852981969486824, |
|
"grad_norm": 0.7236303091049194, |
|
"learning_rate": 4.1653389720916474e-10, |
|
"loss": 0.5567, |
|
"step": 17760 |
|
}, |
|
{ |
|
"epoch": 0.9864077669902913, |
|
"grad_norm": 0.3008541762828827, |
|
"learning_rate": 3.5604778885564567e-10, |
|
"loss": 0.5912, |
|
"step": 17780 |
|
}, |
|
{ |
|
"epoch": 0.9875173370319001, |
|
"grad_norm": 0.4916051924228668, |
|
"learning_rate": 3.0030136658157343e-10, |
|
"loss": 0.5003, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.988626907073509, |
|
"grad_norm": 0.34007692337036133, |
|
"learning_rate": 2.492956887764075e-10, |
|
"loss": 0.5146, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 0.9897364771151179, |
|
"grad_norm": 0.35913458466529846, |
|
"learning_rate": 2.0303172382293843e-10, |
|
"loss": 0.5003, |
|
"step": 17840 |
|
}, |
|
{ |
|
"epoch": 0.9908460471567268, |
|
"grad_norm": 0.38319680094718933, |
|
"learning_rate": 1.6151035007883062e-10, |
|
"loss": 0.458, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 0.9919556171983357, |
|
"grad_norm": 0.4455585181713104, |
|
"learning_rate": 1.2473235585983012e-10, |
|
"loss": 0.4971, |
|
"step": 17880 |
|
}, |
|
{ |
|
"epoch": 0.9930651872399445, |
|
"grad_norm": 0.521392822265625, |
|
"learning_rate": 9.269843942505407e-11, |
|
"loss": 0.5466, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.9941747572815534, |
|
"grad_norm": 0.2978236675262451, |
|
"learning_rate": 6.54092089634739e-11, |
|
"loss": 0.5089, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.9952843273231623, |
|
"grad_norm": 0.4199017584323883, |
|
"learning_rate": 4.286518258250771e-11, |
|
"loss": 0.4961, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 0.9963938973647711, |
|
"grad_norm": 0.39249876141548157, |
|
"learning_rate": 2.506678829819475e-11, |
|
"loss": 0.4764, |
|
"step": 17960 |
|
}, |
|
{ |
|
"epoch": 0.9975034674063801, |
|
"grad_norm": 0.41262540221214294, |
|
"learning_rate": 1.2014364026979862e-11, |
|
"loss": 0.5336, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.9986130374479889, |
|
"grad_norm": 0.33444827795028687, |
|
"learning_rate": 3.708157579357385e-12, |
|
"loss": 0.5269, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.9997226074895977, |
|
"grad_norm": 0.423446387052536, |
|
"learning_rate": 1.4832665518049737e-13, |
|
"loss": 0.4982, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 18025, |
|
"total_flos": 4.409726465817354e+17, |
|
"train_loss": 0.606607598028302, |
|
"train_runtime": 18158.8522, |
|
"train_samples_per_second": 0.993, |
|
"train_steps_per_second": 0.993 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 18025, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.409726465817354e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|