|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9858605558264264, |
|
"eval_steps": 500, |
|
"global_step": 510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005850804485616773, |
|
"grad_norm": 6.383665084838867, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"loss": 0.207, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011701608971233545, |
|
"grad_norm": 6.675337314605713, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 0.2143, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.017552413456850317, |
|
"grad_norm": 7.4617815017700195, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.2293, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02340321794246709, |
|
"grad_norm": 5.585313320159912, |
|
"learning_rate": 7.843137254901962e-07, |
|
"loss": 0.1861, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02925402242808386, |
|
"grad_norm": 5.630923748016357, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.1921, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.035104826913700635, |
|
"grad_norm": 5.787583827972412, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 0.1962, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.040955631399317405, |
|
"grad_norm": 5.5350823402404785, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"loss": 0.1839, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04680643588493418, |
|
"grad_norm": 5.308877944946289, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 0.1818, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05265724037055095, |
|
"grad_norm": 6.017881393432617, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"loss": 0.2044, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05850804485616772, |
|
"grad_norm": 3.9479219913482666, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.1677, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0643588493417845, |
|
"grad_norm": 3.2280476093292236, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"loss": 0.1684, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07020965382740127, |
|
"grad_norm": 3.0776169300079346, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 0.1723, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07606045831301804, |
|
"grad_norm": 1.9993395805358887, |
|
"learning_rate": 2.549019607843137e-06, |
|
"loss": 0.159, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08191126279863481, |
|
"grad_norm": 2.2911689281463623, |
|
"learning_rate": 2.7450980392156867e-06, |
|
"loss": 0.1769, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08776206728425158, |
|
"grad_norm": 2.2151801586151123, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.1598, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09361287176986836, |
|
"grad_norm": 2.7120745182037354, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"loss": 0.1625, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09946367625548513, |
|
"grad_norm": 2.8011069297790527, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.1803, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1053144807411019, |
|
"grad_norm": 2.3757805824279785, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 0.1696, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11116528522671867, |
|
"grad_norm": 2.58486270904541, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"loss": 0.1831, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11701608971233544, |
|
"grad_norm": 2.677863121032715, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.1519, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12286689419795221, |
|
"grad_norm": 2.3059568405151367, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 0.1609, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.128717698683569, |
|
"grad_norm": 1.1877626180648804, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 0.155, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.13456850316918575, |
|
"grad_norm": 0.8065866827964783, |
|
"learning_rate": 4.509803921568628e-06, |
|
"loss": 0.1608, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.14041930765480254, |
|
"grad_norm": 0.7501436471939087, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 0.1692, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1462701121404193, |
|
"grad_norm": 0.659354031085968, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 0.1455, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15212091662603608, |
|
"grad_norm": 0.6396603584289551, |
|
"learning_rate": 5.098039215686274e-06, |
|
"loss": 0.1498, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.15797172111165286, |
|
"grad_norm": 0.8220036029815674, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 0.1449, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16382252559726962, |
|
"grad_norm": 0.7791479229927063, |
|
"learning_rate": 5.4901960784313735e-06, |
|
"loss": 0.1428, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1696733300828864, |
|
"grad_norm": 0.9419583678245544, |
|
"learning_rate": 5.686274509803922e-06, |
|
"loss": 0.1395, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.17552413456850316, |
|
"grad_norm": 0.7534624934196472, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.1439, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18137493905411994, |
|
"grad_norm": 0.6648500561714172, |
|
"learning_rate": 6.07843137254902e-06, |
|
"loss": 0.1391, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.18722574353973673, |
|
"grad_norm": 0.7320256233215332, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 0.1461, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.19307654802535348, |
|
"grad_norm": 0.5900516510009766, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 0.1504, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19892735251097027, |
|
"grad_norm": 0.5753044486045837, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1406, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.20477815699658702, |
|
"grad_norm": 0.5507317185401917, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 0.1313, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2106289614822038, |
|
"grad_norm": 0.6329162120819092, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 0.1462, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.21647976596782056, |
|
"grad_norm": 0.5627315640449524, |
|
"learning_rate": 7.2549019607843145e-06, |
|
"loss": 0.1526, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.22233057045343735, |
|
"grad_norm": 0.5392419099807739, |
|
"learning_rate": 7.450980392156863e-06, |
|
"loss": 0.1144, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.22818137493905413, |
|
"grad_norm": 0.6186360716819763, |
|
"learning_rate": 7.647058823529411e-06, |
|
"loss": 0.1379, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2340321794246709, |
|
"grad_norm": 0.5838163495063782, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.1477, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23988298391028767, |
|
"grad_norm": 0.6214900612831116, |
|
"learning_rate": 8.03921568627451e-06, |
|
"loss": 0.1244, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.24573378839590443, |
|
"grad_norm": 0.7451300621032715, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 0.1501, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2515845928815212, |
|
"grad_norm": 0.6587160229682922, |
|
"learning_rate": 8.43137254901961e-06, |
|
"loss": 0.1381, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.257435397367138, |
|
"grad_norm": 0.5924107432365417, |
|
"learning_rate": 8.627450980392157e-06, |
|
"loss": 0.1356, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.26328620185275475, |
|
"grad_norm": 0.5446859002113342, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.1204, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2691370063383715, |
|
"grad_norm": 0.5046283602714539, |
|
"learning_rate": 9.019607843137256e-06, |
|
"loss": 0.1333, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2749878108239883, |
|
"grad_norm": 0.4857878088951111, |
|
"learning_rate": 9.215686274509804e-06, |
|
"loss": 0.1307, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2808386153096051, |
|
"grad_norm": 0.5312994718551636, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 0.1275, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.28668941979522183, |
|
"grad_norm": 0.5524762272834778, |
|
"learning_rate": 9.607843137254903e-06, |
|
"loss": 0.1366, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2925402242808386, |
|
"grad_norm": 0.5190322399139404, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.1419, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2983910287664554, |
|
"grad_norm": 0.46393024921417236, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1392, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.30424183325207216, |
|
"grad_norm": 0.4669331908226013, |
|
"learning_rate": 9.999882884955554e-06, |
|
"loss": 0.117, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3100926377376889, |
|
"grad_norm": 0.49370425939559937, |
|
"learning_rate": 9.999531545308584e-06, |
|
"loss": 0.1359, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3159434422233057, |
|
"grad_norm": 0.4881090223789215, |
|
"learning_rate": 9.998945997517957e-06, |
|
"loss": 0.1303, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3217942467089225, |
|
"grad_norm": 0.5082895755767822, |
|
"learning_rate": 9.998126269014255e-06, |
|
"loss": 0.1311, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.32764505119453924, |
|
"grad_norm": 0.4817737936973572, |
|
"learning_rate": 9.997072398198492e-06, |
|
"loss": 0.1341, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.333495855680156, |
|
"grad_norm": 0.49731704592704773, |
|
"learning_rate": 9.99578443444032e-06, |
|
"loss": 0.1258, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3393466601657728, |
|
"grad_norm": 0.4882417321205139, |
|
"learning_rate": 9.994262438075713e-06, |
|
"loss": 0.1479, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.34519746465138956, |
|
"grad_norm": 0.47016623616218567, |
|
"learning_rate": 9.992506480404137e-06, |
|
"loss": 0.1333, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3510482691370063, |
|
"grad_norm": 0.4617152214050293, |
|
"learning_rate": 9.990516643685222e-06, |
|
"loss": 0.134, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35689907362262313, |
|
"grad_norm": 0.4279603064060211, |
|
"learning_rate": 9.988293021134888e-06, |
|
"loss": 0.1259, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3627498781082399, |
|
"grad_norm": 0.4004892408847809, |
|
"learning_rate": 9.985835716921e-06, |
|
"loss": 0.1313, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.36860068259385664, |
|
"grad_norm": 0.49121877551078796, |
|
"learning_rate": 9.983144846158472e-06, |
|
"loss": 0.1376, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.37445148707947346, |
|
"grad_norm": 0.543146550655365, |
|
"learning_rate": 9.980220534903889e-06, |
|
"loss": 0.1358, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3803022915650902, |
|
"grad_norm": 0.4203166365623474, |
|
"learning_rate": 9.977062920149583e-06, |
|
"loss": 0.1415, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.38615309605070697, |
|
"grad_norm": 0.5869492888450623, |
|
"learning_rate": 9.973672149817232e-06, |
|
"loss": 0.1245, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3920039005363237, |
|
"grad_norm": 0.38548362255096436, |
|
"learning_rate": 9.970048382750925e-06, |
|
"loss": 0.1349, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.39785470502194054, |
|
"grad_norm": 0.43501177430152893, |
|
"learning_rate": 9.966191788709716e-06, |
|
"loss": 0.1372, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4037055095075573, |
|
"grad_norm": 0.459250807762146, |
|
"learning_rate": 9.96210254835968e-06, |
|
"loss": 0.1482, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.40955631399317405, |
|
"grad_norm": 0.4854992926120758, |
|
"learning_rate": 9.957780853265441e-06, |
|
"loss": 0.139, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41540711847879086, |
|
"grad_norm": 0.4333488941192627, |
|
"learning_rate": 9.953226905881208e-06, |
|
"loss": 0.1181, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4212579229644076, |
|
"grad_norm": 0.43450847268104553, |
|
"learning_rate": 9.948440919541277e-06, |
|
"loss": 0.1328, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4271087274500244, |
|
"grad_norm": 0.4384412467479706, |
|
"learning_rate": 9.943423118450051e-06, |
|
"loss": 0.1405, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.43295953193564113, |
|
"grad_norm": 0.5065910816192627, |
|
"learning_rate": 9.938173737671531e-06, |
|
"loss": 0.1416, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.43881033642125794, |
|
"grad_norm": 0.48674866557121277, |
|
"learning_rate": 9.932693023118299e-06, |
|
"loss": 0.1432, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4446611409068747, |
|
"grad_norm": 0.47392505407333374, |
|
"learning_rate": 9.926981231540007e-06, |
|
"loss": 0.1468, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.45051194539249145, |
|
"grad_norm": 0.43997856974601746, |
|
"learning_rate": 9.921038630511345e-06, |
|
"loss": 0.1523, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.45636274987810826, |
|
"grad_norm": 0.42127934098243713, |
|
"learning_rate": 9.91486549841951e-06, |
|
"loss": 0.1443, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.462213554363725, |
|
"grad_norm": 0.43109971284866333, |
|
"learning_rate": 9.908462124451152e-06, |
|
"loss": 0.1401, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4680643588493418, |
|
"grad_norm": 0.45185860991477966, |
|
"learning_rate": 9.901828808578846e-06, |
|
"loss": 0.1203, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.47391516333495853, |
|
"grad_norm": 0.43880337476730347, |
|
"learning_rate": 9.894965861547023e-06, |
|
"loss": 0.1331, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.47976596782057535, |
|
"grad_norm": 0.45226937532424927, |
|
"learning_rate": 9.887873604857424e-06, |
|
"loss": 0.1374, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4856167723061921, |
|
"grad_norm": 0.4365948438644409, |
|
"learning_rate": 9.88055237075403e-06, |
|
"loss": 0.1412, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.49146757679180886, |
|
"grad_norm": 0.43214043974876404, |
|
"learning_rate": 9.873002502207502e-06, |
|
"loss": 0.1441, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.49731838127742567, |
|
"grad_norm": 0.4197068214416504, |
|
"learning_rate": 9.86522435289912e-06, |
|
"loss": 0.149, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5031691857630424, |
|
"grad_norm": 0.41914910078048706, |
|
"learning_rate": 9.857218287204204e-06, |
|
"loss": 0.1285, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5090199902486592, |
|
"grad_norm": 0.4386901259422302, |
|
"learning_rate": 9.848984680175049e-06, |
|
"loss": 0.1459, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.514870794734276, |
|
"grad_norm": 0.39859330654144287, |
|
"learning_rate": 9.840523917523354e-06, |
|
"loss": 0.146, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5207215992198927, |
|
"grad_norm": 0.4264526665210724, |
|
"learning_rate": 9.831836395602164e-06, |
|
"loss": 0.1254, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5265724037055095, |
|
"grad_norm": 0.4199821352958679, |
|
"learning_rate": 9.822922521387277e-06, |
|
"loss": 0.1376, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5324232081911263, |
|
"grad_norm": 0.41378048062324524, |
|
"learning_rate": 9.813782712458206e-06, |
|
"loss": 0.1306, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.538274012676743, |
|
"grad_norm": 0.4491420090198517, |
|
"learning_rate": 9.804417396978605e-06, |
|
"loss": 0.1333, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5441248171623598, |
|
"grad_norm": 0.48420554399490356, |
|
"learning_rate": 9.794827013676206e-06, |
|
"loss": 0.152, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5499756216479766, |
|
"grad_norm": 0.43099677562713623, |
|
"learning_rate": 9.78501201182228e-06, |
|
"loss": 0.1477, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5558264261335933, |
|
"grad_norm": 0.44019651412963867, |
|
"learning_rate": 9.774972851210572e-06, |
|
"loss": 0.1388, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5616772306192102, |
|
"grad_norm": 0.4409557580947876, |
|
"learning_rate": 9.764710002135784e-06, |
|
"loss": 0.1262, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.567528035104827, |
|
"grad_norm": 0.4218440353870392, |
|
"learning_rate": 9.754223945371524e-06, |
|
"loss": 0.1397, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5733788395904437, |
|
"grad_norm": 0.400522917509079, |
|
"learning_rate": 9.743515172147793e-06, |
|
"loss": 0.1317, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5792296440760605, |
|
"grad_norm": 0.41842326521873474, |
|
"learning_rate": 9.732584184127973e-06, |
|
"loss": 0.1391, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5850804485616772, |
|
"grad_norm": 0.3865659236907959, |
|
"learning_rate": 9.721431493385322e-06, |
|
"loss": 0.1281, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.590931253047294, |
|
"grad_norm": 0.38467201590538025, |
|
"learning_rate": 9.710057622378992e-06, |
|
"loss": 0.1359, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5967820575329108, |
|
"grad_norm": 0.4947831630706787, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.1451, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6026328620185275, |
|
"grad_norm": 0.4190811514854431, |
|
"learning_rate": 9.686648481193994e-06, |
|
"loss": 0.1432, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6084836665041443, |
|
"grad_norm": 0.38624292612075806, |
|
"learning_rate": 9.674614307640368e-06, |
|
"loss": 0.1352, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6143344709897611, |
|
"grad_norm": 0.43240782618522644, |
|
"learning_rate": 9.66236114702178e-06, |
|
"loss": 0.1422, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6201852754753778, |
|
"grad_norm": 0.4440532326698303, |
|
"learning_rate": 9.649889573350006e-06, |
|
"loss": 0.135, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6260360799609946, |
|
"grad_norm": 0.39602479338645935, |
|
"learning_rate": 9.637200170868607e-06, |
|
"loss": 0.1186, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6318868844466115, |
|
"grad_norm": 0.4450910687446594, |
|
"learning_rate": 9.62429353402556e-06, |
|
"loss": 0.1344, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6377376889322282, |
|
"grad_norm": 0.4157448410987854, |
|
"learning_rate": 9.611170267445401e-06, |
|
"loss": 0.1382, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.643588493417845, |
|
"grad_norm": 0.43927744030952454, |
|
"learning_rate": 9.597830985900913e-06, |
|
"loss": 0.132, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6494392979034618, |
|
"grad_norm": 0.40655484795570374, |
|
"learning_rate": 9.584276314284316e-06, |
|
"loss": 0.1366, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6552901023890785, |
|
"grad_norm": 0.574320912361145, |
|
"learning_rate": 9.570506887577994e-06, |
|
"loss": 0.1338, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6611409068746953, |
|
"grad_norm": 0.4128205478191376, |
|
"learning_rate": 9.556523350824759e-06, |
|
"loss": 0.1236, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.666991711360312, |
|
"grad_norm": 0.44907814264297485, |
|
"learning_rate": 9.542326359097619e-06, |
|
"loss": 0.1377, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6728425158459288, |
|
"grad_norm": 0.40679454803466797, |
|
"learning_rate": 9.527916577469104e-06, |
|
"loss": 0.1425, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6786933203315456, |
|
"grad_norm": 0.42478692531585693, |
|
"learning_rate": 9.5132946809801e-06, |
|
"loss": 0.1424, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6845441248171623, |
|
"grad_norm": 0.40595975518226624, |
|
"learning_rate": 9.498461354608228e-06, |
|
"loss": 0.1268, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6903949293027791, |
|
"grad_norm": 0.42049211263656616, |
|
"learning_rate": 9.483417293235759e-06, |
|
"loss": 0.148, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6962457337883959, |
|
"grad_norm": 0.4690849483013153, |
|
"learning_rate": 9.468163201617063e-06, |
|
"loss": 0.1323, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7020965382740126, |
|
"grad_norm": 0.4121830463409424, |
|
"learning_rate": 9.452699794345583e-06, |
|
"loss": 0.1316, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7079473427596294, |
|
"grad_norm": 0.4223732352256775, |
|
"learning_rate": 9.437027795820373e-06, |
|
"loss": 0.1423, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7137981472452463, |
|
"grad_norm": 0.43553781509399414, |
|
"learning_rate": 9.421147940212152e-06, |
|
"loss": 0.1451, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.719648951730863, |
|
"grad_norm": 0.3980019688606262, |
|
"learning_rate": 9.405060971428924e-06, |
|
"loss": 0.1289, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7254997562164798, |
|
"grad_norm": 0.4621797204017639, |
|
"learning_rate": 9.388767643081109e-06, |
|
"loss": 0.1376, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7313505607020966, |
|
"grad_norm": 0.4204331338405609, |
|
"learning_rate": 9.372268718446259e-06, |
|
"loss": 0.1504, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7372013651877133, |
|
"grad_norm": 0.4406717121601105, |
|
"learning_rate": 9.355564970433288e-06, |
|
"loss": 0.1395, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7430521696733301, |
|
"grad_norm": 0.42246147990226746, |
|
"learning_rate": 9.338657181546277e-06, |
|
"loss": 0.1403, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7489029741589469, |
|
"grad_norm": 0.4052499234676361, |
|
"learning_rate": 9.321546143847802e-06, |
|
"loss": 0.1297, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7547537786445636, |
|
"grad_norm": 0.44214069843292236, |
|
"learning_rate": 9.30423265892184e-06, |
|
"loss": 0.1427, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7606045831301804, |
|
"grad_norm": 0.4202938973903656, |
|
"learning_rate": 9.286717537836211e-06, |
|
"loss": 0.1426, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7664553876157971, |
|
"grad_norm": 0.40408894419670105, |
|
"learning_rate": 9.269001601104593e-06, |
|
"loss": 0.1482, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7723061921014139, |
|
"grad_norm": 0.43469467759132385, |
|
"learning_rate": 9.251085678648072e-06, |
|
"loss": 0.1434, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7781569965870307, |
|
"grad_norm": 0.4173245131969452, |
|
"learning_rate": 9.232970609756267e-06, |
|
"loss": 0.1304, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7840078010726474, |
|
"grad_norm": 0.3956865072250366, |
|
"learning_rate": 9.214657243048021e-06, |
|
"loss": 0.1334, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7898586055582643, |
|
"grad_norm": 0.43496742844581604, |
|
"learning_rate": 9.196146436431635e-06, |
|
"loss": 0.1435, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7957094100438811, |
|
"grad_norm": 0.40430235862731934, |
|
"learning_rate": 9.177439057064684e-06, |
|
"loss": 0.1406, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8015602145294978, |
|
"grad_norm": 0.4011284410953522, |
|
"learning_rate": 9.158535981313395e-06, |
|
"loss": 0.1279, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8074110190151146, |
|
"grad_norm": 0.4090335965156555, |
|
"learning_rate": 9.13943809471159e-06, |
|
"loss": 0.141, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8132618235007314, |
|
"grad_norm": 0.41207408905029297, |
|
"learning_rate": 9.120146291919206e-06, |
|
"loss": 0.1324, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.8191126279863481, |
|
"grad_norm": 0.400602787733078, |
|
"learning_rate": 9.100661476680379e-06, |
|
"loss": 0.1338, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8249634324719649, |
|
"grad_norm": 0.4151029884815216, |
|
"learning_rate": 9.08098456178111e-06, |
|
"loss": 0.1352, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8308142369575817, |
|
"grad_norm": 0.4229414463043213, |
|
"learning_rate": 9.061116469006504e-06, |
|
"loss": 0.1402, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8366650414431984, |
|
"grad_norm": 0.42491304874420166, |
|
"learning_rate": 9.041058129097586e-06, |
|
"loss": 0.1234, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8425158459288152, |
|
"grad_norm": 0.4292084872722626, |
|
"learning_rate": 9.020810481707709e-06, |
|
"loss": 0.1308, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8483666504144319, |
|
"grad_norm": 0.43276500701904297, |
|
"learning_rate": 9.00037447535852e-06, |
|
"loss": 0.1492, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8542174549000487, |
|
"grad_norm": 0.42224961519241333, |
|
"learning_rate": 8.979751067395534e-06, |
|
"loss": 0.132, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8600682593856656, |
|
"grad_norm": 0.5183162689208984, |
|
"learning_rate": 8.958941223943292e-06, |
|
"loss": 0.1517, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8659190638712823, |
|
"grad_norm": 0.41768166422843933, |
|
"learning_rate": 8.937945919860086e-06, |
|
"loss": 0.1413, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8717698683568991, |
|
"grad_norm": 0.48344236612319946, |
|
"learning_rate": 8.916766138692303e-06, |
|
"loss": 0.1399, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.8776206728425159, |
|
"grad_norm": 0.4375057518482208, |
|
"learning_rate": 8.895402872628352e-06, |
|
"loss": 0.1326, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8834714773281326, |
|
"grad_norm": 0.3658796548843384, |
|
"learning_rate": 8.873857122452174e-06, |
|
"loss": 0.1215, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8893222818137494, |
|
"grad_norm": 0.4298015236854553, |
|
"learning_rate": 8.852129897496367e-06, |
|
"loss": 0.1347, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8951730862993662, |
|
"grad_norm": 0.39684000611305237, |
|
"learning_rate": 8.83022221559489e-06, |
|
"loss": 0.143, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.9010238907849829, |
|
"grad_norm": 0.4388231933116913, |
|
"learning_rate": 8.808135103035407e-06, |
|
"loss": 0.13, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.9068746952705997, |
|
"grad_norm": 0.47847867012023926, |
|
"learning_rate": 8.785869594511182e-06, |
|
"loss": 0.1443, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9127254997562165, |
|
"grad_norm": 0.4175593852996826, |
|
"learning_rate": 8.763426733072624e-06, |
|
"loss": 0.1375, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.9185763042418332, |
|
"grad_norm": 0.4408848285675049, |
|
"learning_rate": 8.740807570078419e-06, |
|
"loss": 0.149, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.92442710872745, |
|
"grad_norm": 0.41366511583328247, |
|
"learning_rate": 8.718013165146275e-06, |
|
"loss": 0.1282, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9302779132130667, |
|
"grad_norm": 0.40209296345710754, |
|
"learning_rate": 8.695044586103297e-06, |
|
"loss": 0.1328, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9361287176986836, |
|
"grad_norm": 0.39008352160453796, |
|
"learning_rate": 8.671902908935942e-06, |
|
"loss": 0.1263, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9419795221843004, |
|
"grad_norm": 0.741543173789978, |
|
"learning_rate": 8.648589217739635e-06, |
|
"loss": 0.1348, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9478303266699171, |
|
"grad_norm": 0.3927006125450134, |
|
"learning_rate": 8.625104604667965e-06, |
|
"loss": 0.1448, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9536811311555339, |
|
"grad_norm": 0.3813360035419464, |
|
"learning_rate": 8.601450169881533e-06, |
|
"loss": 0.1367, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.9595319356411507, |
|
"grad_norm": 0.41232413053512573, |
|
"learning_rate": 8.577627021496413e-06, |
|
"loss": 0.1333, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.9653827401267674, |
|
"grad_norm": 0.4455775320529938, |
|
"learning_rate": 8.553636275532236e-06, |
|
"loss": 0.1431, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9712335446123842, |
|
"grad_norm": 0.4114275574684143, |
|
"learning_rate": 8.529479055859918e-06, |
|
"loss": 0.1262, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.977084349098001, |
|
"grad_norm": 0.4263303279876709, |
|
"learning_rate": 8.505156494148997e-06, |
|
"loss": 0.1416, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9829351535836177, |
|
"grad_norm": 0.38404276967048645, |
|
"learning_rate": 8.480669729814635e-06, |
|
"loss": 0.1404, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9887859580692345, |
|
"grad_norm": 0.3970484733581543, |
|
"learning_rate": 8.456019909964224e-06, |
|
"loss": 0.1311, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9946367625548513, |
|
"grad_norm": 0.41995933651924133, |
|
"learning_rate": 8.43120818934367e-06, |
|
"loss": 0.1358, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0014627011214041, |
|
"grad_norm": 0.47906413674354553, |
|
"learning_rate": 8.40623573028327e-06, |
|
"loss": 0.1602, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.007313505607021, |
|
"grad_norm": 0.3676239550113678, |
|
"learning_rate": 8.381103702643295e-06, |
|
"loss": 0.09, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0131643100926377, |
|
"grad_norm": 0.3158869445323944, |
|
"learning_rate": 8.35581328375915e-06, |
|
"loss": 0.0749, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.0190151145782544, |
|
"grad_norm": 0.3189470171928406, |
|
"learning_rate": 8.330365658386252e-06, |
|
"loss": 0.085, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.0248659190638714, |
|
"grad_norm": 0.2992206811904907, |
|
"learning_rate": 8.30476201864451e-06, |
|
"loss": 0.0758, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.030716723549488, |
|
"grad_norm": 0.36117929220199585, |
|
"learning_rate": 8.27900356396249e-06, |
|
"loss": 0.0737, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.0365675280351048, |
|
"grad_norm": 0.4092389643192291, |
|
"learning_rate": 8.25309150102121e-06, |
|
"loss": 0.0874, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0424183325207217, |
|
"grad_norm": 0.42305558919906616, |
|
"learning_rate": 8.227027043697642e-06, |
|
"loss": 0.0988, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0482691370063384, |
|
"grad_norm": 0.34523454308509827, |
|
"learning_rate": 8.200811413007808e-06, |
|
"loss": 0.0851, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.054119941491955, |
|
"grad_norm": 0.389148086309433, |
|
"learning_rate": 8.174445837049614e-06, |
|
"loss": 0.0823, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.059970745977572, |
|
"grad_norm": 0.33525988459587097, |
|
"learning_rate": 8.147931550945301e-06, |
|
"loss": 0.0775, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.0658215504631887, |
|
"grad_norm": 0.3539344370365143, |
|
"learning_rate": 8.121269796783585e-06, |
|
"loss": 0.0869, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.0716723549488054, |
|
"grad_norm": 0.32239681482315063, |
|
"learning_rate": 8.094461823561473e-06, |
|
"loss": 0.0769, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.0775231594344223, |
|
"grad_norm": 0.3420362174510956, |
|
"learning_rate": 8.06750888712576e-06, |
|
"loss": 0.091, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.083373963920039, |
|
"grad_norm": 0.34657928347587585, |
|
"learning_rate": 8.040412250114184e-06, |
|
"loss": 0.0752, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0892247684056557, |
|
"grad_norm": 0.3150254786014557, |
|
"learning_rate": 8.013173181896283e-06, |
|
"loss": 0.0684, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0950755728912727, |
|
"grad_norm": 0.33900588750839233, |
|
"learning_rate": 7.985792958513932e-06, |
|
"loss": 0.0846, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.1009263773768894, |
|
"grad_norm": 0.35361960530281067, |
|
"learning_rate": 7.958272862621562e-06, |
|
"loss": 0.0879, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.106777181862506, |
|
"grad_norm": 0.3507941663265228, |
|
"learning_rate": 7.930614183426074e-06, |
|
"loss": 0.0773, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.1126279863481228, |
|
"grad_norm": 0.36692172288894653, |
|
"learning_rate": 7.902818216626446e-06, |
|
"loss": 0.0954, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1184787908337397, |
|
"grad_norm": 0.33450374007225037, |
|
"learning_rate": 7.874886264353035e-06, |
|
"loss": 0.0745, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.1243295953193564, |
|
"grad_norm": 0.3424067795276642, |
|
"learning_rate": 7.846819635106569e-06, |
|
"loss": 0.0838, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.130180399804973, |
|
"grad_norm": 0.33934175968170166, |
|
"learning_rate": 7.818619643696863e-06, |
|
"loss": 0.0769, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.13603120429059, |
|
"grad_norm": 0.3160100281238556, |
|
"learning_rate": 7.790287611181217e-06, |
|
"loss": 0.0811, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.1418820087762067, |
|
"grad_norm": 0.2945168912410736, |
|
"learning_rate": 7.76182486480253e-06, |
|
"loss": 0.0794, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1477328132618234, |
|
"grad_norm": 0.34128355979919434, |
|
"learning_rate": 7.733232737927123e-06, |
|
"loss": 0.0753, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.1535836177474403, |
|
"grad_norm": 0.3292410671710968, |
|
"learning_rate": 7.70451256998228e-06, |
|
"loss": 0.0807, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.159434422233057, |
|
"grad_norm": 0.30368009209632874, |
|
"learning_rate": 7.675665706393502e-06, |
|
"loss": 0.0774, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1652852267186737, |
|
"grad_norm": 0.3382299542427063, |
|
"learning_rate": 7.646693498521472e-06, |
|
"loss": 0.0801, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.1711360312042907, |
|
"grad_norm": 0.34679096937179565, |
|
"learning_rate": 7.617597303598754e-06, |
|
"loss": 0.0828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1769868356899074, |
|
"grad_norm": 0.3604985773563385, |
|
"learning_rate": 7.588378484666214e-06, |
|
"loss": 0.0857, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.182837640175524, |
|
"grad_norm": 0.3484993278980255, |
|
"learning_rate": 7.559038410509161e-06, |
|
"loss": 0.0853, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.188688444661141, |
|
"grad_norm": 0.3147986829280853, |
|
"learning_rate": 7.529578455593232e-06, |
|
"loss": 0.0833, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.1945392491467577, |
|
"grad_norm": 0.3428245782852173, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0829, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.2003900536323744, |
|
"grad_norm": 0.31704169511795044, |
|
"learning_rate": 7.47030442936232e-06, |
|
"loss": 0.0826, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.2062408581179913, |
|
"grad_norm": 0.3161751329898834, |
|
"learning_rate": 7.440493134799425e-06, |
|
"loss": 0.0774, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.212091662603608, |
|
"grad_norm": 0.3535175919532776, |
|
"learning_rate": 7.4105675128517456e-06, |
|
"loss": 0.0728, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.2179424670892247, |
|
"grad_norm": 0.37694627046585083, |
|
"learning_rate": 7.380528965415501e-06, |
|
"loss": 0.1014, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.2237932715748416, |
|
"grad_norm": 0.36685872077941895, |
|
"learning_rate": 7.35037889967702e-06, |
|
"loss": 0.0885, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.2296440760604583, |
|
"grad_norm": 0.3464377522468567, |
|
"learning_rate": 7.320118728046818e-06, |
|
"loss": 0.0875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.235494880546075, |
|
"grad_norm": 0.30345961451530457, |
|
"learning_rate": 7.289749868093432e-06, |
|
"loss": 0.0784, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.2413456850316917, |
|
"grad_norm": 0.34137392044067383, |
|
"learning_rate": 7.259273742477017e-06, |
|
"loss": 0.0741, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.2471964895173087, |
|
"grad_norm": 0.3188891112804413, |
|
"learning_rate": 7.2286917788826926e-06, |
|
"loss": 0.0782, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2530472940029254, |
|
"grad_norm": 0.30841705203056335, |
|
"learning_rate": 7.19800540995367e-06, |
|
"loss": 0.0768, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.258898098488542, |
|
"grad_norm": 0.3702380657196045, |
|
"learning_rate": 7.167216073224136e-06, |
|
"loss": 0.0851, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.264748902974159, |
|
"grad_norm": 0.3240349590778351, |
|
"learning_rate": 7.136325211051905e-06, |
|
"loss": 0.0756, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2705997074597757, |
|
"grad_norm": 0.3365533649921417, |
|
"learning_rate": 7.1053342705508564e-06, |
|
"loss": 0.0835, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.2764505119453924, |
|
"grad_norm": 0.2976110577583313, |
|
"learning_rate": 7.074244703523137e-06, |
|
"loss": 0.0667, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.2823013164310093, |
|
"grad_norm": 0.3380391597747803, |
|
"learning_rate": 7.043057966391158e-06, |
|
"loss": 0.0822, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.288152120916626, |
|
"grad_norm": 0.3523262143135071, |
|
"learning_rate": 7.011775520129363e-06, |
|
"loss": 0.0776, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2940029254022427, |
|
"grad_norm": 0.29708200693130493, |
|
"learning_rate": 6.980398830195785e-06, |
|
"loss": 0.0809, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.2998537298878596, |
|
"grad_norm": 0.3353002369403839, |
|
"learning_rate": 6.948929366463397e-06, |
|
"loss": 0.0698, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.3057045343734763, |
|
"grad_norm": 0.34924203157424927, |
|
"learning_rate": 6.9173686031512595e-06, |
|
"loss": 0.0923, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.311555338859093, |
|
"grad_norm": 0.31901872158050537, |
|
"learning_rate": 6.885718018755448e-06, |
|
"loss": 0.0806, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.31740614334471, |
|
"grad_norm": 0.33020636439323425, |
|
"learning_rate": 6.8539790959798045e-06, |
|
"loss": 0.0778, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.3232569478303267, |
|
"grad_norm": 0.35056766867637634, |
|
"learning_rate": 6.822153321666469e-06, |
|
"loss": 0.0827, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.3291077523159434, |
|
"grad_norm": 0.34175387024879456, |
|
"learning_rate": 6.790242186726231e-06, |
|
"loss": 0.082, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.3349585568015603, |
|
"grad_norm": 0.34124261140823364, |
|
"learning_rate": 6.758247186068684e-06, |
|
"loss": 0.084, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.340809361287177, |
|
"grad_norm": 0.3028365969657898, |
|
"learning_rate": 6.7261698185322e-06, |
|
"loss": 0.0768, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.3466601657727937, |
|
"grad_norm": 0.3365366756916046, |
|
"learning_rate": 6.6940115868137065e-06, |
|
"loss": 0.076, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3525109702584106, |
|
"grad_norm": 0.30919474363327026, |
|
"learning_rate": 6.6617739973982985e-06, |
|
"loss": 0.0789, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.3583617747440273, |
|
"grad_norm": 0.3369212746620178, |
|
"learning_rate": 6.629458560488664e-06, |
|
"loss": 0.0945, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.364212579229644, |
|
"grad_norm": 0.3106522560119629, |
|
"learning_rate": 6.597066789934336e-06, |
|
"loss": 0.081, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.370063383715261, |
|
"grad_norm": 0.36917534470558167, |
|
"learning_rate": 6.5646002031607726e-06, |
|
"loss": 0.0842, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.3759141882008776, |
|
"grad_norm": 0.33988648653030396, |
|
"learning_rate": 6.5320603210982745e-06, |
|
"loss": 0.0859, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.3817649926864943, |
|
"grad_norm": 0.34712544083595276, |
|
"learning_rate": 6.499448668110735e-06, |
|
"loss": 0.0884, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.3876157971721113, |
|
"grad_norm": 0.3763306140899658, |
|
"learning_rate": 6.466766771924231e-06, |
|
"loss": 0.0861, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.393466601657728, |
|
"grad_norm": 0.33518311381340027, |
|
"learning_rate": 6.434016163555452e-06, |
|
"loss": 0.0798, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.3993174061433447, |
|
"grad_norm": 0.31457552313804626, |
|
"learning_rate": 6.401198377239979e-06, |
|
"loss": 0.0788, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.4051682106289616, |
|
"grad_norm": 0.3552372455596924, |
|
"learning_rate": 6.368314950360416e-06, |
|
"loss": 0.0806, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4110190151145783, |
|
"grad_norm": 0.32624173164367676, |
|
"learning_rate": 6.3353674233743585e-06, |
|
"loss": 0.0852, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.416869819600195, |
|
"grad_norm": 0.36660850048065186, |
|
"learning_rate": 6.302357339742245e-06, |
|
"loss": 0.0887, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.422720624085812, |
|
"grad_norm": 0.30113551020622253, |
|
"learning_rate": 6.269286245855039e-06, |
|
"loss": 0.0733, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.3478698134422302, |
|
"learning_rate": 6.236155690961795e-06, |
|
"loss": 0.0733, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.4344222330570453, |
|
"grad_norm": 0.346223384141922, |
|
"learning_rate": 6.202967227097073e-06, |
|
"loss": 0.0892, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4402730375426622, |
|
"grad_norm": 0.3341302275657654, |
|
"learning_rate": 6.169722409008244e-06, |
|
"loss": 0.0797, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.446123842028279, |
|
"grad_norm": 0.3604235351085663, |
|
"learning_rate": 6.136422794082645e-06, |
|
"loss": 0.0697, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.4519746465138956, |
|
"grad_norm": 0.3553491234779358, |
|
"learning_rate": 6.10306994227463e-06, |
|
"loss": 0.0789, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.4578254509995126, |
|
"grad_norm": 0.358963280916214, |
|
"learning_rate": 6.0696654160324875e-06, |
|
"loss": 0.0809, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.4636762554851293, |
|
"grad_norm": 0.3794144093990326, |
|
"learning_rate": 6.0362107802252486e-06, |
|
"loss": 0.0817, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.469527059970746, |
|
"grad_norm": 0.3239617943763733, |
|
"learning_rate": 6.002707602069377e-06, |
|
"loss": 0.0837, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.4753778644563629, |
|
"grad_norm": 0.3182874321937561, |
|
"learning_rate": 5.9691574510553505e-06, |
|
"loss": 0.0877, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.4812286689419796, |
|
"grad_norm": 0.3259419798851013, |
|
"learning_rate": 5.935561898874142e-06, |
|
"loss": 0.0846, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.4870794734275963, |
|
"grad_norm": 0.3229186236858368, |
|
"learning_rate": 5.901922519343586e-06, |
|
"loss": 0.0785, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.4929302779132132, |
|
"grad_norm": 0.3261856734752655, |
|
"learning_rate": 5.8682408883346535e-06, |
|
"loss": 0.0893, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.49878108239883, |
|
"grad_norm": 0.3506629168987274, |
|
"learning_rate": 5.834518583697628e-06, |
|
"loss": 0.0953, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.5046318868844466, |
|
"grad_norm": 0.3554547131061554, |
|
"learning_rate": 5.800757185188195e-06, |
|
"loss": 0.0806, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.5104826913700635, |
|
"grad_norm": 0.3573915958404541, |
|
"learning_rate": 5.766958274393428e-06, |
|
"loss": 0.0875, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.51633349585568, |
|
"grad_norm": 0.34284883737564087, |
|
"learning_rate": 5.733123434657704e-06, |
|
"loss": 0.0906, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.522184300341297, |
|
"grad_norm": 0.3178277015686035, |
|
"learning_rate": 5.699254251008524e-06, |
|
"loss": 0.0731, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5280351048269138, |
|
"grad_norm": 0.40219858288764954, |
|
"learning_rate": 5.66535231008227e-06, |
|
"loss": 0.0817, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.5338859093125303, |
|
"grad_norm": 0.35268712043762207, |
|
"learning_rate": 5.631419200049867e-06, |
|
"loss": 0.0795, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.5397367137981472, |
|
"grad_norm": 0.3267902433872223, |
|
"learning_rate": 5.597456510542395e-06, |
|
"loss": 0.0688, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.5455875182837642, |
|
"grad_norm": 0.3811274766921997, |
|
"learning_rate": 5.5634658325766066e-06, |
|
"loss": 0.0812, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.5514383227693807, |
|
"grad_norm": 0.3165196180343628, |
|
"learning_rate": 5.529448758480408e-06, |
|
"loss": 0.0693, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.5572891272549976, |
|
"grad_norm": 0.3585417866706848, |
|
"learning_rate": 5.495406881818256e-06, |
|
"loss": 0.0853, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.5631399317406145, |
|
"grad_norm": 0.30655720829963684, |
|
"learning_rate": 5.46134179731651e-06, |
|
"loss": 0.0847, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.568990736226231, |
|
"grad_norm": 0.2977589964866638, |
|
"learning_rate": 5.427255100788726e-06, |
|
"loss": 0.0801, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.574841540711848, |
|
"grad_norm": 0.3374764323234558, |
|
"learning_rate": 5.393148389060893e-06, |
|
"loss": 0.0761, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.5806923451974646, |
|
"grad_norm": 0.33393600583076477, |
|
"learning_rate": 5.359023259896638e-06, |
|
"loss": 0.0827, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5865431496830813, |
|
"grad_norm": 0.35629692673683167, |
|
"learning_rate": 5.3248813119223665e-06, |
|
"loss": 0.0833, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.5923939541686982, |
|
"grad_norm": 0.3579968810081482, |
|
"learning_rate": 5.290724144552379e-06, |
|
"loss": 0.0883, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.598244758654315, |
|
"grad_norm": 0.33415958285331726, |
|
"learning_rate": 5.2565533579139484e-06, |
|
"loss": 0.0811, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.6040955631399316, |
|
"grad_norm": 0.31935596466064453, |
|
"learning_rate": 5.222370552772353e-06, |
|
"loss": 0.0852, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.6099463676255485, |
|
"grad_norm": 0.35256752371788025, |
|
"learning_rate": 5.188177330455886e-06, |
|
"loss": 0.0913, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.6157971721111652, |
|
"grad_norm": 0.31540942192077637, |
|
"learning_rate": 5.153975292780852e-06, |
|
"loss": 0.0789, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.621647976596782, |
|
"grad_norm": 0.3884231448173523, |
|
"learning_rate": 5.119766041976516e-06, |
|
"loss": 0.0784, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.6274987810823989, |
|
"grad_norm": 0.30289870500564575, |
|
"learning_rate": 5.085551180610046e-06, |
|
"loss": 0.073, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.6333495855680156, |
|
"grad_norm": 0.3469166159629822, |
|
"learning_rate": 5.05133231151145e-06, |
|
"loss": 0.0781, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.6392003900536323, |
|
"grad_norm": 0.3332676887512207, |
|
"learning_rate": 5.017111037698477e-06, |
|
"loss": 0.0837, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6450511945392492, |
|
"grad_norm": 0.3515254557132721, |
|
"learning_rate": 4.9828889623015265e-06, |
|
"loss": 0.0945, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.650901999024866, |
|
"grad_norm": 0.3309212923049927, |
|
"learning_rate": 4.948667688488552e-06, |
|
"loss": 0.0773, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.6567528035104826, |
|
"grad_norm": 0.3406410217285156, |
|
"learning_rate": 4.9144488193899546e-06, |
|
"loss": 0.0742, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.6626036079960995, |
|
"grad_norm": 0.3194100260734558, |
|
"learning_rate": 4.880233958023486e-06, |
|
"loss": 0.0768, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.6684544124817162, |
|
"grad_norm": 0.34699711203575134, |
|
"learning_rate": 4.846024707219149e-06, |
|
"loss": 0.0853, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.674305216967333, |
|
"grad_norm": 0.3401515781879425, |
|
"learning_rate": 4.811822669544115e-06, |
|
"loss": 0.0755, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.6801560214529498, |
|
"grad_norm": 0.3512323498725891, |
|
"learning_rate": 4.777629447227649e-06, |
|
"loss": 0.0928, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.6860068259385665, |
|
"grad_norm": 0.3097745478153229, |
|
"learning_rate": 4.7434466420860515e-06, |
|
"loss": 0.0727, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.6918576304241832, |
|
"grad_norm": 0.2933128774166107, |
|
"learning_rate": 4.7092758554476215e-06, |
|
"loss": 0.0729, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.6977084349098002, |
|
"grad_norm": 0.35171079635620117, |
|
"learning_rate": 4.675118688077634e-06, |
|
"loss": 0.081, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7035592393954169, |
|
"grad_norm": 0.3128647804260254, |
|
"learning_rate": 4.640976740103363e-06, |
|
"loss": 0.0783, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.7094100438810336, |
|
"grad_norm": 0.35127922892570496, |
|
"learning_rate": 4.606851610939108e-06, |
|
"loss": 0.0812, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.7152608483666505, |
|
"grad_norm": 0.30880433320999146, |
|
"learning_rate": 4.572744899211275e-06, |
|
"loss": 0.0819, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.7211116528522672, |
|
"grad_norm": 0.32968375086784363, |
|
"learning_rate": 4.53865820268349e-06, |
|
"loss": 0.0816, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.726962457337884, |
|
"grad_norm": 0.3447258174419403, |
|
"learning_rate": 4.504593118181745e-06, |
|
"loss": 0.0712, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.7328132618235008, |
|
"grad_norm": 0.31902605295181274, |
|
"learning_rate": 4.470551241519594e-06, |
|
"loss": 0.0861, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.7386640663091175, |
|
"grad_norm": 0.34594473242759705, |
|
"learning_rate": 4.436534167423395e-06, |
|
"loss": 0.0767, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.7445148707947342, |
|
"grad_norm": 0.341180682182312, |
|
"learning_rate": 4.402543489457607e-06, |
|
"loss": 0.0841, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.7503656752803511, |
|
"grad_norm": 0.30309170484542847, |
|
"learning_rate": 4.368580799950133e-06, |
|
"loss": 0.081, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.7562164797659678, |
|
"grad_norm": 0.2895878553390503, |
|
"learning_rate": 4.334647689917734e-06, |
|
"loss": 0.0667, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7620672842515845, |
|
"grad_norm": 0.33328762650489807, |
|
"learning_rate": 4.300745748991478e-06, |
|
"loss": 0.0841, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.7679180887372015, |
|
"grad_norm": 0.34671878814697266, |
|
"learning_rate": 4.266876565342298e-06, |
|
"loss": 0.0768, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.7737688932228182, |
|
"grad_norm": 0.3152706027030945, |
|
"learning_rate": 4.233041725606573e-06, |
|
"loss": 0.0816, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.7796196977084349, |
|
"grad_norm": 0.32463833689689636, |
|
"learning_rate": 4.199242814811807e-06, |
|
"loss": 0.0805, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.7854705021940518, |
|
"grad_norm": 0.33821901679039, |
|
"learning_rate": 4.1654814163023735e-06, |
|
"loss": 0.075, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.7913213066796685, |
|
"grad_norm": 0.31825628876686096, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.0769, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.7971721111652852, |
|
"grad_norm": 0.31411388516426086, |
|
"learning_rate": 4.098077480656415e-06, |
|
"loss": 0.0788, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.8030229156509021, |
|
"grad_norm": 0.31883758306503296, |
|
"learning_rate": 4.064438101125859e-06, |
|
"loss": 0.0741, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.8088737201365188, |
|
"grad_norm": 0.383833646774292, |
|
"learning_rate": 4.03084254894465e-06, |
|
"loss": 0.0888, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.8147245246221355, |
|
"grad_norm": 0.3364141285419464, |
|
"learning_rate": 3.997292397930624e-06, |
|
"loss": 0.0689, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.8205753291077524, |
|
"grad_norm": 0.31626763939857483, |
|
"learning_rate": 3.963789219774753e-06, |
|
"loss": 0.0741, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.8264261335933691, |
|
"grad_norm": 0.31046804785728455, |
|
"learning_rate": 3.930334583967514e-06, |
|
"loss": 0.0787, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.8322769380789858, |
|
"grad_norm": 0.3264026641845703, |
|
"learning_rate": 3.896930057725372e-06, |
|
"loss": 0.078, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.8381277425646028, |
|
"grad_norm": 0.32011500000953674, |
|
"learning_rate": 3.863577205917356e-06, |
|
"loss": 0.0757, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.8439785470502192, |
|
"grad_norm": 0.3323825001716614, |
|
"learning_rate": 3.8302775909917585e-06, |
|
"loss": 0.0831, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.8498293515358362, |
|
"grad_norm": 0.3106033504009247, |
|
"learning_rate": 3.7970327729029288e-06, |
|
"loss": 0.0742, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.855680156021453, |
|
"grad_norm": 0.3245903551578522, |
|
"learning_rate": 3.7638443090382067e-06, |
|
"loss": 0.0786, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.8615309605070696, |
|
"grad_norm": 0.31213557720184326, |
|
"learning_rate": 3.730713754144961e-06, |
|
"loss": 0.0691, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.8673817649926865, |
|
"grad_norm": 0.33322617411613464, |
|
"learning_rate": 3.6976426602577565e-06, |
|
"loss": 0.087, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.8732325694783034, |
|
"grad_norm": 0.32928356528282166, |
|
"learning_rate": 3.6646325766256423e-06, |
|
"loss": 0.0692, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.87908337396392, |
|
"grad_norm": 0.30873045325279236, |
|
"learning_rate": 3.6316850496395863e-06, |
|
"loss": 0.0729, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.8849341784495368, |
|
"grad_norm": 0.3300439715385437, |
|
"learning_rate": 3.598801622760021e-06, |
|
"loss": 0.0712, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.8907849829351537, |
|
"grad_norm": 0.3383248746395111, |
|
"learning_rate": 3.5659838364445505e-06, |
|
"loss": 0.0658, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.8966357874207702, |
|
"grad_norm": 0.3341819643974304, |
|
"learning_rate": 3.5332332280757706e-06, |
|
"loss": 0.081, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.9024865919063871, |
|
"grad_norm": 0.32250940799713135, |
|
"learning_rate": 3.5005513318892666e-06, |
|
"loss": 0.0796, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.908337396392004, |
|
"grad_norm": 0.4472445547580719, |
|
"learning_rate": 3.4679396789017263e-06, |
|
"loss": 0.0882, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.9141882008776205, |
|
"grad_norm": 0.31572669744491577, |
|
"learning_rate": 3.4353997968392295e-06, |
|
"loss": 0.0656, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.9200390053632375, |
|
"grad_norm": 0.3064400255680084, |
|
"learning_rate": 3.402933210065665e-06, |
|
"loss": 0.0796, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.9258898098488544, |
|
"grad_norm": 0.3014156222343445, |
|
"learning_rate": 3.3705414395113354e-06, |
|
"loss": 0.0763, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.9317406143344709, |
|
"grad_norm": 0.3247920870780945, |
|
"learning_rate": 3.3382260026017027e-06, |
|
"loss": 0.0751, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9375914188200878, |
|
"grad_norm": 0.32763242721557617, |
|
"learning_rate": 3.305988413186295e-06, |
|
"loss": 0.0684, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.9434422233057045, |
|
"grad_norm": 0.3187495470046997, |
|
"learning_rate": 3.2738301814678015e-06, |
|
"loss": 0.0643, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.9492930277913212, |
|
"grad_norm": 0.33601319789886475, |
|
"learning_rate": 3.241752813931316e-06, |
|
"loss": 0.0842, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.955143832276938, |
|
"grad_norm": 0.3197375237941742, |
|
"learning_rate": 3.2097578132737716e-06, |
|
"loss": 0.0655, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.9609946367625548, |
|
"grad_norm": 0.35822373628616333, |
|
"learning_rate": 3.1778466783335328e-06, |
|
"loss": 0.0921, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.9668454412481715, |
|
"grad_norm": 0.31587547063827515, |
|
"learning_rate": 3.1460209040201967e-06, |
|
"loss": 0.0795, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.9726962457337884, |
|
"grad_norm": 0.3147113621234894, |
|
"learning_rate": 3.114281981244553e-06, |
|
"loss": 0.0729, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.9785470502194051, |
|
"grad_norm": 0.33007726073265076, |
|
"learning_rate": 3.082631396848743e-06, |
|
"loss": 0.0802, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.9843978547050218, |
|
"grad_norm": 0.3043513894081116, |
|
"learning_rate": 3.0510706335366034e-06, |
|
"loss": 0.0746, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.9902486591906388, |
|
"grad_norm": 0.3211682438850403, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.078, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9960994636762555, |
|
"grad_norm": 0.3464657962322235, |
|
"learning_rate": 2.9882244798706372e-06, |
|
"loss": 0.0711, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.0029254022428082, |
|
"grad_norm": 0.306328684091568, |
|
"learning_rate": 2.956942033608843e-06, |
|
"loss": 0.0624, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.008776206728425, |
|
"grad_norm": 0.24574756622314453, |
|
"learning_rate": 2.9257552964768644e-06, |
|
"loss": 0.0475, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.014627011214042, |
|
"grad_norm": 0.2128523290157318, |
|
"learning_rate": 2.8946657294491452e-06, |
|
"loss": 0.0607, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.0204778156996586, |
|
"grad_norm": 0.19593936204910278, |
|
"learning_rate": 2.863674788948097e-06, |
|
"loss": 0.0443, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.0263286201852755, |
|
"grad_norm": 0.2186254858970642, |
|
"learning_rate": 2.832783926775865e-06, |
|
"loss": 0.0546, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.0321794246708924, |
|
"grad_norm": 0.2001432478427887, |
|
"learning_rate": 2.8019945900463307e-06, |
|
"loss": 0.0463, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.038030229156509, |
|
"grad_norm": 0.2021595686674118, |
|
"learning_rate": 2.771308221117309e-06, |
|
"loss": 0.0467, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.043881033642126, |
|
"grad_norm": 0.21800445020198822, |
|
"learning_rate": 2.740726257522987e-06, |
|
"loss": 0.0537, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.0497318381277427, |
|
"grad_norm": 0.24673891067504883, |
|
"learning_rate": 2.7102501319065706e-06, |
|
"loss": 0.0524, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.055582642613359, |
|
"grad_norm": 0.2493050992488861, |
|
"learning_rate": 2.6798812719531843e-06, |
|
"loss": 0.0566, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.061433447098976, |
|
"grad_norm": 0.2142155021429062, |
|
"learning_rate": 2.6496211003229795e-06, |
|
"loss": 0.0464, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.067284251584593, |
|
"grad_norm": 0.22218072414398193, |
|
"learning_rate": 2.6194710345845e-06, |
|
"loss": 0.0501, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.0731350560702095, |
|
"grad_norm": 0.22646678984165192, |
|
"learning_rate": 2.5894324871482557e-06, |
|
"loss": 0.0462, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.0789858605558265, |
|
"grad_norm": 0.21073314547538757, |
|
"learning_rate": 2.559506865200576e-06, |
|
"loss": 0.0439, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.0848366650414434, |
|
"grad_norm": 0.19907771050930023, |
|
"learning_rate": 2.529695570637679e-06, |
|
"loss": 0.0458, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.09068746952706, |
|
"grad_norm": 0.21957091987133026, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.0552, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.096538274012677, |
|
"grad_norm": 0.19745470583438873, |
|
"learning_rate": 2.4704215444067684e-06, |
|
"loss": 0.0412, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.1023890784982937, |
|
"grad_norm": 0.23495222628116608, |
|
"learning_rate": 2.4409615894908407e-06, |
|
"loss": 0.0521, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.10823988298391, |
|
"grad_norm": 0.23315241932868958, |
|
"learning_rate": 2.411621515333788e-06, |
|
"loss": 0.0519, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.114090687469527, |
|
"grad_norm": 0.23338022828102112, |
|
"learning_rate": 2.3824026964012487e-06, |
|
"loss": 0.0523, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.119941491955144, |
|
"grad_norm": 0.21098627150058746, |
|
"learning_rate": 2.35330650147853e-06, |
|
"loss": 0.0552, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.1257922964407605, |
|
"grad_norm": 0.20283359289169312, |
|
"learning_rate": 2.324334293606499e-06, |
|
"loss": 0.044, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.1316431009263774, |
|
"grad_norm": 0.21483571827411652, |
|
"learning_rate": 2.2954874300177197e-06, |
|
"loss": 0.0433, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.1374939054119944, |
|
"grad_norm": 0.2035175859928131, |
|
"learning_rate": 2.266767262072878e-06, |
|
"loss": 0.0472, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.143344709897611, |
|
"grad_norm": 0.21213766932487488, |
|
"learning_rate": 2.238175135197471e-06, |
|
"loss": 0.053, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.1491955143832278, |
|
"grad_norm": 0.2157718688249588, |
|
"learning_rate": 2.2097123888187825e-06, |
|
"loss": 0.05, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.1550463188688447, |
|
"grad_norm": 0.19376371800899506, |
|
"learning_rate": 2.181380356303139e-06, |
|
"loss": 0.0397, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.160897123354461, |
|
"grad_norm": 0.21883581578731537, |
|
"learning_rate": 2.1531803648934333e-06, |
|
"loss": 0.0446, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.166747927840078, |
|
"grad_norm": 0.21257570385932922, |
|
"learning_rate": 2.1251137356469677e-06, |
|
"loss": 0.0476, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.172598732325695, |
|
"grad_norm": 0.21544501185417175, |
|
"learning_rate": 2.0971817833735548e-06, |
|
"loss": 0.0499, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.1784495368113115, |
|
"grad_norm": 0.18301881849765778, |
|
"learning_rate": 2.069385816573928e-06, |
|
"loss": 0.0392, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.1843003412969284, |
|
"grad_norm": 0.22180932760238647, |
|
"learning_rate": 2.0417271373784403e-06, |
|
"loss": 0.0559, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.1901511457825453, |
|
"grad_norm": 0.21485093235969543, |
|
"learning_rate": 2.0142070414860704e-06, |
|
"loss": 0.0444, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.196001950268162, |
|
"grad_norm": 0.21097077429294586, |
|
"learning_rate": 1.9868268181037186e-06, |
|
"loss": 0.0479, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.2018527547537787, |
|
"grad_norm": 0.2308683693408966, |
|
"learning_rate": 1.9595877498858175e-06, |
|
"loss": 0.0521, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.2077035592393957, |
|
"grad_norm": 0.21699382364749908, |
|
"learning_rate": 1.9324911128742406e-06, |
|
"loss": 0.0479, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.213554363725012, |
|
"grad_norm": 0.20137636363506317, |
|
"learning_rate": 1.9055381764385272e-06, |
|
"loss": 0.0507, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.219405168210629, |
|
"grad_norm": 0.227940171957016, |
|
"learning_rate": 1.8787302032164168e-06, |
|
"loss": 0.0568, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.2252559726962455, |
|
"grad_norm": 0.23801080882549286, |
|
"learning_rate": 1.8520684490547014e-06, |
|
"loss": 0.0582, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2311067771818625, |
|
"grad_norm": 0.24348334968090057, |
|
"learning_rate": 1.8255541629503865e-06, |
|
"loss": 0.0505, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.2369575816674794, |
|
"grad_norm": 0.23667719960212708, |
|
"learning_rate": 1.7991885869921928e-06, |
|
"loss": 0.0423, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.242808386153096, |
|
"grad_norm": 0.21500073373317719, |
|
"learning_rate": 1.7729729563023613e-06, |
|
"loss": 0.0478, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.248659190638713, |
|
"grad_norm": 0.18407940864562988, |
|
"learning_rate": 1.746908498978791e-06, |
|
"loss": 0.0454, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.2545099951243297, |
|
"grad_norm": 0.22536802291870117, |
|
"learning_rate": 1.7209964360375137e-06, |
|
"loss": 0.0474, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.260360799609946, |
|
"grad_norm": 0.22089789807796478, |
|
"learning_rate": 1.6952379813554914e-06, |
|
"loss": 0.0407, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.266211604095563, |
|
"grad_norm": 0.19465979933738708, |
|
"learning_rate": 1.6696343416137495e-06, |
|
"loss": 0.05, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.27206240858118, |
|
"grad_norm": 0.2152717262506485, |
|
"learning_rate": 1.6441867162408514e-06, |
|
"loss": 0.0484, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.2779132130667965, |
|
"grad_norm": 0.1996590495109558, |
|
"learning_rate": 1.6188962973567068e-06, |
|
"loss": 0.0463, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.2837640175524134, |
|
"grad_norm": 0.22818255424499512, |
|
"learning_rate": 1.5937642697167288e-06, |
|
"loss": 0.0509, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.2896148220380304, |
|
"grad_norm": 0.18348443508148193, |
|
"learning_rate": 1.5687918106563326e-06, |
|
"loss": 0.0469, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.295465626523647, |
|
"grad_norm": 0.16341239213943481, |
|
"learning_rate": 1.5439800900357765e-06, |
|
"loss": 0.0408, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.3013164310092638, |
|
"grad_norm": 0.22039690613746643, |
|
"learning_rate": 1.5193302701853674e-06, |
|
"loss": 0.0457, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.3071672354948807, |
|
"grad_norm": 0.20820854604244232, |
|
"learning_rate": 1.4948435058510036e-06, |
|
"loss": 0.0404, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.313018039980497, |
|
"grad_norm": 0.6139259338378906, |
|
"learning_rate": 1.4705209441400841e-06, |
|
"loss": 0.0516, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.318868844466114, |
|
"grad_norm": 0.1655004769563675, |
|
"learning_rate": 1.4463637244677648e-06, |
|
"loss": 0.0419, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.324719648951731, |
|
"grad_norm": 0.20899701118469238, |
|
"learning_rate": 1.422372978503589e-06, |
|
"loss": 0.0488, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.3305704534373475, |
|
"grad_norm": 0.19723419845104218, |
|
"learning_rate": 1.3985498301184685e-06, |
|
"loss": 0.048, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.3364212579229644, |
|
"grad_norm": 0.21295493841171265, |
|
"learning_rate": 1.374895395332037e-06, |
|
"loss": 0.0499, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.3422720624085813, |
|
"grad_norm": 0.22390897572040558, |
|
"learning_rate": 1.351410782260366e-06, |
|
"loss": 0.0501, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.348122866894198, |
|
"grad_norm": 0.22746333479881287, |
|
"learning_rate": 1.3280970910640573e-06, |
|
"loss": 0.0487, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.3539736713798147, |
|
"grad_norm": 0.20877297222614288, |
|
"learning_rate": 1.3049554138967052e-06, |
|
"loss": 0.043, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.3598244758654316, |
|
"grad_norm": 0.23040781915187836, |
|
"learning_rate": 1.2819868348537263e-06, |
|
"loss": 0.0521, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.365675280351048, |
|
"grad_norm": 0.18759937584400177, |
|
"learning_rate": 1.259192429921584e-06, |
|
"loss": 0.0411, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.371526084836665, |
|
"grad_norm": 0.21358315646648407, |
|
"learning_rate": 1.2365732669273778e-06, |
|
"loss": 0.0476, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.377376889322282, |
|
"grad_norm": 0.20594115555286407, |
|
"learning_rate": 1.2141304054888204e-06, |
|
"loss": 0.0501, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.3832276938078985, |
|
"grad_norm": 0.21114301681518555, |
|
"learning_rate": 1.1918648969645947e-06, |
|
"loss": 0.0427, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.3890784982935154, |
|
"grad_norm": 0.19082777202129364, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.0467, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.3949293027791323, |
|
"grad_norm": 0.19006142020225525, |
|
"learning_rate": 1.1478701025036359e-06, |
|
"loss": 0.0429, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.4007801072647488, |
|
"grad_norm": 0.22013723850250244, |
|
"learning_rate": 1.126142877547826e-06, |
|
"loss": 0.0481, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.4066309117503657, |
|
"grad_norm": 0.22271747887134552, |
|
"learning_rate": 1.1045971273716476e-06, |
|
"loss": 0.0538, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.4124817162359826, |
|
"grad_norm": 0.2010895311832428, |
|
"learning_rate": 1.083233861307697e-06, |
|
"loss": 0.0435, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.418332520721599, |
|
"grad_norm": 0.2763868272304535, |
|
"learning_rate": 1.062054080139916e-06, |
|
"loss": 0.0498, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.424183325207216, |
|
"grad_norm": 0.21242615580558777, |
|
"learning_rate": 1.0410587760567104e-06, |
|
"loss": 0.0533, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.430034129692833, |
|
"grad_norm": 0.24994240701198578, |
|
"learning_rate": 1.0202489326044663e-06, |
|
"loss": 0.0443, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.4358849341784494, |
|
"grad_norm": 0.26131340861320496, |
|
"learning_rate": 9.99625524641481e-07, |
|
"loss": 0.0465, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.4417357386640663, |
|
"grad_norm": 0.21973882615566254, |
|
"learning_rate": 9.791895182922911e-07, |
|
"loss": 0.0525, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.4475865431496833, |
|
"grad_norm": 0.21667824685573578, |
|
"learning_rate": 9.589418709024146e-07, |
|
"loss": 0.0435, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.4534373476352997, |
|
"grad_norm": 0.22149190306663513, |
|
"learning_rate": 9.388835309934985e-07, |
|
"loss": 0.0531, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.4592881521209167, |
|
"grad_norm": 0.2104058861732483, |
|
"learning_rate": 9.190154382188921e-07, |
|
"loss": 0.0518, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.465138956606533, |
|
"grad_norm": 0.19894002377986908, |
|
"learning_rate": 8.993385233196223e-07, |
|
"loss": 0.0432, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.47098976109215, |
|
"grad_norm": 0.2094515860080719, |
|
"learning_rate": 8.79853708080795e-07, |
|
"loss": 0.0528, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.476840565577767, |
|
"grad_norm": 0.5233778357505798, |
|
"learning_rate": 8.605619052884106e-07, |
|
"loss": 0.0445, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.4826913700633835, |
|
"grad_norm": 0.21081270277500153, |
|
"learning_rate": 8.414640186866063e-07, |
|
"loss": 0.0553, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.4885421745490004, |
|
"grad_norm": 0.1904570311307907, |
|
"learning_rate": 8.225609429353187e-07, |
|
"loss": 0.0389, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.4943929790346173, |
|
"grad_norm": 0.2278057038784027, |
|
"learning_rate": 8.03853563568367e-07, |
|
"loss": 0.0468, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.500243783520234, |
|
"grad_norm": 0.23266924917697906, |
|
"learning_rate": 7.8534275695198e-07, |
|
"loss": 0.0545, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.5060945880058507, |
|
"grad_norm": 0.23620003461837769, |
|
"learning_rate": 7.670293902437331e-07, |
|
"loss": 0.0502, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.5119453924914676, |
|
"grad_norm": 0.2107357382774353, |
|
"learning_rate": 7.489143213519301e-07, |
|
"loss": 0.0502, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.517796196977084, |
|
"grad_norm": 0.20279434323310852, |
|
"learning_rate": 7.309983988954078e-07, |
|
"loss": 0.0561, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.523647001462701, |
|
"grad_norm": 0.19296404719352722, |
|
"learning_rate": 7.132824621637891e-07, |
|
"loss": 0.0427, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.529497805948318, |
|
"grad_norm": 0.22379833459854126, |
|
"learning_rate": 6.957673410781617e-07, |
|
"loss": 0.0442, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.5353486104339344, |
|
"grad_norm": 0.20516741275787354, |
|
"learning_rate": 6.784538561521986e-07, |
|
"loss": 0.043, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.5411994149195514, |
|
"grad_norm": 0.20625388622283936, |
|
"learning_rate": 6.613428184537235e-07, |
|
"loss": 0.0555, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.5470502194051683, |
|
"grad_norm": 0.1993888020515442, |
|
"learning_rate": 6.444350295667112e-07, |
|
"loss": 0.0462, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.5529010238907848, |
|
"grad_norm": 0.20914866030216217, |
|
"learning_rate": 6.277312815537423e-07, |
|
"loss": 0.0449, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.5587518283764017, |
|
"grad_norm": 0.20487205684185028, |
|
"learning_rate": 6.112323569188927e-07, |
|
"loss": 0.0442, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.5646026328620186, |
|
"grad_norm": 0.2008737027645111, |
|
"learning_rate": 5.949390285710777e-07, |
|
"loss": 0.0454, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.570453437347635, |
|
"grad_norm": 0.20572151243686676, |
|
"learning_rate": 5.788520597878477e-07, |
|
"loss": 0.044, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.576304241833252, |
|
"grad_norm": 0.20191822946071625, |
|
"learning_rate": 5.629722041796292e-07, |
|
"loss": 0.0419, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.582155046318869, |
|
"grad_norm": 0.21377748250961304, |
|
"learning_rate": 5.473002056544191e-07, |
|
"loss": 0.045, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.5880058508044854, |
|
"grad_norm": 0.2058108150959015, |
|
"learning_rate": 5.318367983829393e-07, |
|
"loss": 0.0423, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.5938566552901023, |
|
"grad_norm": 0.20492474734783173, |
|
"learning_rate": 5.165827067642415e-07, |
|
"loss": 0.0473, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.5997074597757193, |
|
"grad_norm": 0.22236965596675873, |
|
"learning_rate": 5.015386453917742e-07, |
|
"loss": 0.04, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.6055582642613357, |
|
"grad_norm": 0.20536741614341736, |
|
"learning_rate": 4.867053190199011e-07, |
|
"loss": 0.0464, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.6114090687469527, |
|
"grad_norm": 0.2184278517961502, |
|
"learning_rate": 4.720834225308962e-07, |
|
"loss": 0.0508, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.6172598732325696, |
|
"grad_norm": 0.19578547775745392, |
|
"learning_rate": 4.576736409023813e-07, |
|
"loss": 0.0492, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.623110677718186, |
|
"grad_norm": 0.21041178703308105, |
|
"learning_rate": 4.4347664917524293e-07, |
|
"loss": 0.0467, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.628961482203803, |
|
"grad_norm": 0.19261011481285095, |
|
"learning_rate": 4.29493112422007e-07, |
|
"loss": 0.0519, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.63481228668942, |
|
"grad_norm": 0.1865607649087906, |
|
"learning_rate": 4.15723685715686e-07, |
|
"loss": 0.0452, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.6406630911750364, |
|
"grad_norm": 0.2131820172071457, |
|
"learning_rate": 4.0216901409908695e-07, |
|
"loss": 0.0434, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.6465138956606533, |
|
"grad_norm": 0.17361226677894592, |
|
"learning_rate": 3.8882973255459975e-07, |
|
"loss": 0.0373, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.6523647001462702, |
|
"grad_norm": 0.2203158587217331, |
|
"learning_rate": 3.7570646597444196e-07, |
|
"loss": 0.0432, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.6582155046318867, |
|
"grad_norm": 0.20222148299217224, |
|
"learning_rate": 3.627998291313939e-07, |
|
"loss": 0.0426, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.6640663091175036, |
|
"grad_norm": 0.20766018331050873, |
|
"learning_rate": 3.5011042664999663e-07, |
|
"loss": 0.0543, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.6699171136031206, |
|
"grad_norm": 0.22400252521038055, |
|
"learning_rate": 3.3763885297822153e-07, |
|
"loss": 0.0511, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.675767918088737, |
|
"grad_norm": 0.18321222066879272, |
|
"learning_rate": 3.2538569235963216e-07, |
|
"loss": 0.0459, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.681618722574354, |
|
"grad_norm": 0.18486328423023224, |
|
"learning_rate": 3.133515188060077e-07, |
|
"loss": 0.0429, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.687469527059971, |
|
"grad_norm": 0.20323659479618073, |
|
"learning_rate": 3.015368960704584e-07, |
|
"loss": 0.0469, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.6933203315455874, |
|
"grad_norm": 0.21503138542175293, |
|
"learning_rate": 2.899423776210092e-07, |
|
"loss": 0.0518, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.6991711360312043, |
|
"grad_norm": 0.2213236689567566, |
|
"learning_rate": 2.785685066146776e-07, |
|
"loss": 0.0547, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 2.705021940516821, |
|
"grad_norm": 0.20127376914024353, |
|
"learning_rate": 2.6741581587202747e-07, |
|
"loss": 0.0431, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.7108727450024377, |
|
"grad_norm": 0.37414053082466125, |
|
"learning_rate": 2.5648482785220865e-07, |
|
"loss": 0.0541, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 2.7167235494880546, |
|
"grad_norm": 0.21136891841888428, |
|
"learning_rate": 2.4577605462847764e-07, |
|
"loss": 0.0451, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.7225743539736715, |
|
"grad_norm": 0.1917283684015274, |
|
"learning_rate": 2.3528999786421758e-07, |
|
"loss": 0.0432, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.728425158459288, |
|
"grad_norm": 0.2059125304222107, |
|
"learning_rate": 2.25027148789429e-07, |
|
"loss": 0.0498, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.734275962944905, |
|
"grad_norm": 0.22194840013980865, |
|
"learning_rate": 2.1498798817772281e-07, |
|
"loss": 0.0484, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.740126767430522, |
|
"grad_norm": 0.2024865299463272, |
|
"learning_rate": 2.0517298632379445e-07, |
|
"loss": 0.0478, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.7459775719161383, |
|
"grad_norm": 0.2053341269493103, |
|
"learning_rate": 1.9558260302139642e-07, |
|
"loss": 0.044, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.7518283764017553, |
|
"grad_norm": 0.16805504262447357, |
|
"learning_rate": 1.8621728754179392e-07, |
|
"loss": 0.0435, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.757679180887372, |
|
"grad_norm": 0.22590601444244385, |
|
"learning_rate": 1.770774786127244e-07, |
|
"loss": 0.0442, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.7635299853729887, |
|
"grad_norm": 0.19346405565738678, |
|
"learning_rate": 1.6816360439783797e-07, |
|
"loss": 0.0454, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.7693807898586056, |
|
"grad_norm": 0.2147025167942047, |
|
"learning_rate": 1.5947608247664558e-07, |
|
"loss": 0.0469, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.7752315943442225, |
|
"grad_norm": 0.19654686748981476, |
|
"learning_rate": 1.510153198249531e-07, |
|
"loss": 0.046, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.781082398829839, |
|
"grad_norm": 0.1899406611919403, |
|
"learning_rate": 1.4278171279579757e-07, |
|
"loss": 0.0422, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.786933203315456, |
|
"grad_norm": 0.20060855150222778, |
|
"learning_rate": 1.3477564710088097e-07, |
|
"loss": 0.0493, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.792784007801073, |
|
"grad_norm": 0.20946356654167175, |
|
"learning_rate": 1.2699749779249926e-07, |
|
"loss": 0.0491, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.7986348122866893, |
|
"grad_norm": 0.21608950197696686, |
|
"learning_rate": 1.1944762924597286e-07, |
|
"loss": 0.0501, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.8044856167723062, |
|
"grad_norm": 0.2118510901927948, |
|
"learning_rate": 1.1212639514257829e-07, |
|
"loss": 0.0491, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.810336421257923, |
|
"grad_norm": 0.19921454787254333, |
|
"learning_rate": 1.0503413845297739e-07, |
|
"loss": 0.0446, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.8161872257435396, |
|
"grad_norm": 0.2002425491809845, |
|
"learning_rate": 9.817119142115472e-08, |
|
"loss": 0.0469, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.8220380302291566, |
|
"grad_norm": 0.18556718528270721, |
|
"learning_rate": 9.15378755488483e-08, |
|
"loss": 0.0542, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.8278888347147735, |
|
"grad_norm": 0.2261078804731369, |
|
"learning_rate": 8.513450158049109e-08, |
|
"loss": 0.0513, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.83373963920039, |
|
"grad_norm": 0.18545158207416534, |
|
"learning_rate": 7.896136948865429e-08, |
|
"loss": 0.0483, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.839590443686007, |
|
"grad_norm": 0.21641848981380463, |
|
"learning_rate": 7.301876845999368e-08, |
|
"loss": 0.0504, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.845441248171624, |
|
"grad_norm": 0.1922488510608673, |
|
"learning_rate": 6.730697688170251e-08, |
|
"loss": 0.0491, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.8512920526572403, |
|
"grad_norm": 0.21676276624202728, |
|
"learning_rate": 6.182626232847044e-08, |
|
"loss": 0.045, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.20930063724517822, |
|
"learning_rate": 5.6576881549949e-08, |
|
"loss": 0.0455, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.862993661628474, |
|
"grad_norm": 0.1886722296476364, |
|
"learning_rate": 5.155908045872349e-08, |
|
"loss": 0.0418, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.8688444661140906, |
|
"grad_norm": 0.19664409756660461, |
|
"learning_rate": 4.677309411879327e-08, |
|
"loss": 0.0458, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.8746952705997075, |
|
"grad_norm": 0.20216220617294312, |
|
"learning_rate": 4.221914673455896e-08, |
|
"loss": 0.0458, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.8805460750853245, |
|
"grad_norm": 0.18322984874248505, |
|
"learning_rate": 3.7897451640321326e-08, |
|
"loss": 0.045, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.886396879570941, |
|
"grad_norm": 0.21675418317317963, |
|
"learning_rate": 3.3808211290284886e-08, |
|
"loss": 0.0506, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.892247684056558, |
|
"grad_norm": 0.21793396770954132, |
|
"learning_rate": 2.995161724907658e-08, |
|
"loss": 0.0449, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.8980984885421748, |
|
"grad_norm": 0.2188396006822586, |
|
"learning_rate": 2.6327850182769065e-08, |
|
"loss": 0.055, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.9039492930277913, |
|
"grad_norm": 0.2312510460615158, |
|
"learning_rate": 2.29370798504186e-08, |
|
"loss": 0.0449, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.909800097513408, |
|
"grad_norm": 0.22981475293636322, |
|
"learning_rate": 1.9779465096112505e-08, |
|
"loss": 0.0516, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.915650901999025, |
|
"grad_norm": 0.2100413292646408, |
|
"learning_rate": 1.6855153841527915e-08, |
|
"loss": 0.0425, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.9215017064846416, |
|
"grad_norm": 0.21841102838516235, |
|
"learning_rate": 1.4164283079001196e-08, |
|
"loss": 0.0558, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.9273525109702585, |
|
"grad_norm": 0.18299135565757751, |
|
"learning_rate": 1.1706978865113072e-08, |
|
"loss": 0.0416, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.9332033154558754, |
|
"grad_norm": 0.19131526350975037, |
|
"learning_rate": 9.48335631477948e-09, |
|
"loss": 0.042, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.939054119941492, |
|
"grad_norm": 0.1995655596256256, |
|
"learning_rate": 7.49351959586253e-09, |
|
"loss": 0.0621, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.944904924427109, |
|
"grad_norm": 0.18701446056365967, |
|
"learning_rate": 5.737561924288315e-09, |
|
"loss": 0.0392, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.9507557289127258, |
|
"grad_norm": 0.21277514100074768, |
|
"learning_rate": 4.2155655596809455e-09, |
|
"loss": 0.0484, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.9566065333983422, |
|
"grad_norm": 0.19108496606349945, |
|
"learning_rate": 2.9276018015089725e-09, |
|
"loss": 0.0486, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.962457337883959, |
|
"grad_norm": 0.2005542814731598, |
|
"learning_rate": 1.8737309857463916e-09, |
|
"loss": 0.0558, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.968308142369576, |
|
"grad_norm": 0.1871800273656845, |
|
"learning_rate": 1.054002482043237e-09, |
|
"loss": 0.0544, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.9741589468551926, |
|
"grad_norm": 0.21848636865615845, |
|
"learning_rate": 4.684546914163201e-10, |
|
"loss": 0.0513, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.9800097513408095, |
|
"grad_norm": 0.1974973976612091, |
|
"learning_rate": 1.1711504444733567e-10, |
|
"loss": 0.0431, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.9858605558264264, |
|
"grad_norm": 0.20914912223815918, |
|
"learning_rate": 0.0, |
|
"loss": 0.0556, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.9858605558264264, |
|
"step": 510, |
|
"total_flos": 1495659198021632.0, |
|
"train_loss": 0.09055571391740266, |
|
"train_runtime": 153095.38, |
|
"train_samples_per_second": 0.321, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 510, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1495659198021632.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|