|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 217, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004608294930875576, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 9.953917050691245e-06, |
|
"loss": 1.7388, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009216589861751152, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 9.90783410138249e-06, |
|
"loss": 1.6388, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.013824884792626729, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 9.861751152073733e-06, |
|
"loss": 1.7512, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.018433179723502304, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 9.815668202764977e-06, |
|
"loss": 1.694, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02304147465437788, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 9.769585253456221e-06, |
|
"loss": 1.672, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.027649769585253458, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 9.723502304147466e-06, |
|
"loss": 1.643, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 1.6822, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03686635944700461, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 9.631336405529955e-06, |
|
"loss": 1.6898, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.041474654377880185, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 9.5852534562212e-06, |
|
"loss": 1.6326, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04608294930875576, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 9.539170506912442e-06, |
|
"loss": 1.649, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05069124423963134, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 9.493087557603687e-06, |
|
"loss": 1.5545, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.055299539170506916, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 9.447004608294931e-06, |
|
"loss": 1.5782, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.059907834101382486, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 9.400921658986176e-06, |
|
"loss": 1.52, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 9.35483870967742e-06, |
|
"loss": 1.4491, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06912442396313365, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 9.308755760368664e-06, |
|
"loss": 1.511, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07373271889400922, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 9.262672811059909e-06, |
|
"loss": 1.4852, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07834101382488479, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 9.216589861751153e-06, |
|
"loss": 1.5376, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08294930875576037, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 9.170506912442398e-06, |
|
"loss": 1.5211, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08755760368663594, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 9.124423963133642e-06, |
|
"loss": 1.4544, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09216589861751152, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 9.078341013824885e-06, |
|
"loss": 1.5231, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0967741935483871, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 9.03225806451613e-06, |
|
"loss": 1.5041, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.10138248847926268, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 8.986175115207374e-06, |
|
"loss": 1.4218, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10599078341013825, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 8.940092165898619e-06, |
|
"loss": 1.4255, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11059907834101383, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 8.894009216589863e-06, |
|
"loss": 1.3793, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1152073732718894, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 8.847926267281107e-06, |
|
"loss": 1.3534, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11981566820276497, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 8.80184331797235e-06, |
|
"loss": 1.4523, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.12442396313364056, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 8.755760368663595e-06, |
|
"loss": 1.4478, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 8.70967741935484e-06, |
|
"loss": 1.4346, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1336405529953917, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 8.663594470046084e-06, |
|
"loss": 1.4048, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1382488479262673, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 8.617511520737328e-06, |
|
"loss": 1.3774, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 1.3979, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.14746543778801843, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 8.525345622119815e-06, |
|
"loss": 1.3433, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15207373271889402, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.47926267281106e-06, |
|
"loss": 1.3798, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.15668202764976957, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 8.433179723502304e-06, |
|
"loss": 1.3775, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 8.387096774193549e-06, |
|
"loss": 1.3377, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16589861751152074, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 8.341013824884793e-06, |
|
"loss": 1.3534, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.17050691244239632, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 8.294930875576038e-06, |
|
"loss": 1.3038, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.17511520737327188, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 8.248847926267282e-06, |
|
"loss": 1.3418, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.17972350230414746, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 8.202764976958527e-06, |
|
"loss": 1.3258, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.18433179723502305, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 8.156682027649771e-06, |
|
"loss": 1.3713, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1889400921658986, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 8.110599078341016e-06, |
|
"loss": 1.3138, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 1.3456, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.19815668202764977, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 8.018433179723503e-06, |
|
"loss": 1.306, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.20276497695852536, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 7.972350230414747e-06, |
|
"loss": 1.3187, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2073732718894009, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 7.926267281105992e-06, |
|
"loss": 1.2345, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2119815668202765, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 7.880184331797236e-06, |
|
"loss": 1.2895, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.21658986175115208, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 7.83410138248848e-06, |
|
"loss": 1.3031, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.22119815668202766, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 7.788018433179724e-06, |
|
"loss": 1.3293, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.22580645161290322, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 7.741935483870968e-06, |
|
"loss": 1.2938, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2304147465437788, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 7.695852534562212e-06, |
|
"loss": 1.2732, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2350230414746544, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 7.649769585253457e-06, |
|
"loss": 1.2642, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.23963133640552994, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 7.603686635944701e-06, |
|
"loss": 1.3026, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.24423963133640553, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 7.557603686635945e-06, |
|
"loss": 1.2532, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2488479262672811, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 7.5115207373271895e-06, |
|
"loss": 1.2107, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2534562211981567, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 7.465437788018434e-06, |
|
"loss": 1.2805, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 7.4193548387096784e-06, |
|
"loss": 1.3163, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2626728110599078, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 7.373271889400923e-06, |
|
"loss": 1.2855, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2672811059907834, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 7.327188940092167e-06, |
|
"loss": 1.2611, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.271889400921659, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 7.28110599078341e-06, |
|
"loss": 1.2804, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2764976958525346, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 7.235023041474655e-06, |
|
"loss": 1.2066, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28110599078341014, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 7.188940092165899e-06, |
|
"loss": 1.2636, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 1.2248, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2903225806451613, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 7.096774193548388e-06, |
|
"loss": 1.2694, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.29493087557603687, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 7.050691244239632e-06, |
|
"loss": 1.2142, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2995391705069124, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 7.004608294930876e-06, |
|
"loss": 1.2363, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.30414746543778803, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 6.958525345622121e-06, |
|
"loss": 1.2021, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3087557603686636, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 6.912442396313365e-06, |
|
"loss": 1.2599, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.31336405529953915, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 6.866359447004609e-06, |
|
"loss": 1.2112, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.31797235023041476, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 6.820276497695853e-06, |
|
"loss": 1.2716, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 6.774193548387097e-06, |
|
"loss": 1.206, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3271889400921659, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 6.728110599078341e-06, |
|
"loss": 1.2148, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3317972350230415, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 6.682027649769586e-06, |
|
"loss": 1.2395, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.33640552995391704, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 6.63594470046083e-06, |
|
"loss": 1.2244, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.34101382488479265, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 6.589861751152075e-06, |
|
"loss": 1.2592, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3456221198156682, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 6.543778801843319e-06, |
|
"loss": 1.1787, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.35023041474654376, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 6.497695852534563e-06, |
|
"loss": 1.2108, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3548387096774194, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 1.2065, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.35944700460829493, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 6.405529953917051e-06, |
|
"loss": 1.1616, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3640552995391705, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 6.359447004608295e-06, |
|
"loss": 1.2286, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3686635944700461, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 6.31336405529954e-06, |
|
"loss": 1.2252, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37327188940092165, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 6.267281105990783e-06, |
|
"loss": 1.1997, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3778801843317972, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 6.221198156682028e-06, |
|
"loss": 1.2137, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3824884792626728, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 6.175115207373272e-06, |
|
"loss": 1.21, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 6.129032258064517e-06, |
|
"loss": 1.2207, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.391705069124424, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 6.082949308755761e-06, |
|
"loss": 1.1927, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.39631336405529954, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 6.036866359447006e-06, |
|
"loss": 1.1553, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4009216589861751, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 5.9907834101382485e-06, |
|
"loss": 1.1945, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4055299539170507, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 5.944700460829493e-06, |
|
"loss": 1.2145, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.41013824884792627, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 5.8986175115207375e-06, |
|
"loss": 1.2411, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4147465437788018, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 5.852534562211982e-06, |
|
"loss": 1.1648, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41935483870967744, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 5.806451612903226e-06, |
|
"loss": 1.2028, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.423963133640553, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 5.76036866359447e-06, |
|
"loss": 1.1712, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.1534, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.43317972350230416, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 5.668202764976959e-06, |
|
"loss": 1.199, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4377880184331797, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 5.6221198156682035e-06, |
|
"loss": 1.1339, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4423963133640553, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 5.576036866359448e-06, |
|
"loss": 1.1097, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4470046082949309, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 5.529953917050692e-06, |
|
"loss": 1.191, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.45161290322580644, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 5.483870967741935e-06, |
|
"loss": 1.1458, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.45622119815668205, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 5.43778801843318e-06, |
|
"loss": 1.1599, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4608294930875576, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 5.391705069124424e-06, |
|
"loss": 1.2086, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46543778801843316, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 5.345622119815669e-06, |
|
"loss": 1.1756, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4700460829493088, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 5.299539170506913e-06, |
|
"loss": 1.1644, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.47465437788018433, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 5.253456221198157e-06, |
|
"loss": 1.1534, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4792626728110599, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 5.207373271889401e-06, |
|
"loss": 1.1422, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 5.161290322580646e-06, |
|
"loss": 1.1176, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.48847926267281105, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 5.11520737327189e-06, |
|
"loss": 1.191, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4930875576036866, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 5.0691244239631346e-06, |
|
"loss": 1.1478, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4976958525345622, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 5.023041474654379e-06, |
|
"loss": 1.1111, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5023041474654378, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 4.976958525345623e-06, |
|
"loss": 1.1853, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5069124423963134, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 4.930875576036866e-06, |
|
"loss": 1.0536, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.511520737327189, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 4.884792626728111e-06, |
|
"loss": 1.2063, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 1.2118, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5207373271889401, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 4.7926267281106e-06, |
|
"loss": 1.1711, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5253456221198156, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 4.746543778801843e-06, |
|
"loss": 1.0994, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5299539170506913, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 4.700460829493088e-06, |
|
"loss": 1.2298, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5345622119815668, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 4.654377880184332e-06, |
|
"loss": 1.1533, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5391705069124424, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 4.608294930875577e-06, |
|
"loss": 1.1314, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.543778801843318, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 4.562211981566821e-06, |
|
"loss": 1.0946, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5483870967741935, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 4.516129032258065e-06, |
|
"loss": 1.1598, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5529953917050692, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 4.470046082949309e-06, |
|
"loss": 1.205, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5576036866359447, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 4.423963133640554e-06, |
|
"loss": 1.1472, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5622119815668203, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 4.377880184331797e-06, |
|
"loss": 1.161, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5668202764976958, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 4.331797235023042e-06, |
|
"loss": 1.1368, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 1.1498, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.576036866359447, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 4.23963133640553e-06, |
|
"loss": 1.1394, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5806451612903226, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 4.193548387096774e-06, |
|
"loss": 1.118, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5852534562211982, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 4.147465437788019e-06, |
|
"loss": 1.1132, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5898617511520737, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 4.101382488479263e-06, |
|
"loss": 1.1317, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5944700460829493, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 4.055299539170508e-06, |
|
"loss": 1.1931, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5990783410138248, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 4.0092165898617514e-06, |
|
"loss": 1.1573, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6036866359447005, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.963133640552996e-06, |
|
"loss": 1.1818, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6082949308755761, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 3.91705069124424e-06, |
|
"loss": 1.1848, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6129032258064516, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 3.870967741935484e-06, |
|
"loss": 1.2125, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6175115207373272, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 3.8248847926267285e-06, |
|
"loss": 1.0897, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6221198156682027, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 3.7788018433179725e-06, |
|
"loss": 1.1279, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6267281105990783, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 3.732718894009217e-06, |
|
"loss": 1.1931, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.631336405529954, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 3.6866359447004615e-06, |
|
"loss": 1.1233, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6359447004608295, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.640552995391705e-06, |
|
"loss": 1.1553, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6405529953917051, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.5944700460829495e-06, |
|
"loss": 1.1352, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 3.548387096774194e-06, |
|
"loss": 1.1494, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6497695852534562, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 3.502304147465438e-06, |
|
"loss": 1.1649, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6543778801843319, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.4562211981566825e-06, |
|
"loss": 1.1116, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6589861751152074, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 3.4101382488479266e-06, |
|
"loss": 1.1897, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.663594470046083, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.3640552995391706e-06, |
|
"loss": 1.1898, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6682027649769585, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.317972350230415e-06, |
|
"loss": 1.125, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6728110599078341, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.2718894009216596e-06, |
|
"loss": 1.133, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6774193548387096, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 1.1272, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6820276497695853, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 3.1797235023041477e-06, |
|
"loss": 1.1471, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6866359447004609, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.1336405529953917e-06, |
|
"loss": 1.1574, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6912442396313364, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 3.087557603686636e-06, |
|
"loss": 1.1739, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.695852534562212, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.0414746543778806e-06, |
|
"loss": 1.1835, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7004608294930875, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 2.9953917050691243e-06, |
|
"loss": 1.1064, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7050691244239631, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 2.9493087557603687e-06, |
|
"loss": 1.1405, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7096774193548387, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 2.903225806451613e-06, |
|
"loss": 1.1237, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 1.067, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7188940092165899, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 2.8110599078341017e-06, |
|
"loss": 1.0972, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7235023041474654, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 2.764976958525346e-06, |
|
"loss": 1.1545, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.728110599078341, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 2.71889400921659e-06, |
|
"loss": 1.1446, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7327188940092166, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 2.6728110599078343e-06, |
|
"loss": 1.1437, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7373271889400922, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 2.6267281105990783e-06, |
|
"loss": 1.0699, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7419354838709677, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 2.580645161290323e-06, |
|
"loss": 1.1981, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7465437788018433, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 2.5345622119815673e-06, |
|
"loss": 1.1747, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7511520737327189, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 2.4884792626728113e-06, |
|
"loss": 1.1183, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7557603686635944, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.4423963133640554e-06, |
|
"loss": 1.1191, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7603686635944701, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 2.3963133640553e-06, |
|
"loss": 1.1296, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7649769585253456, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 2.350230414746544e-06, |
|
"loss": 1.1151, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7695852534562212, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 2.3041474654377884e-06, |
|
"loss": 1.1226, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 2.2580645161290324e-06, |
|
"loss": 1.1928, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7788018433179723, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 2.211981566820277e-06, |
|
"loss": 1.1284, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.783410138248848, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 2.165898617511521e-06, |
|
"loss": 1.1221, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7880184331797235, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 2.119815668202765e-06, |
|
"loss": 1.0839, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7926267281105991, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 2.0737327188940094e-06, |
|
"loss": 1.1434, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7972350230414746, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 2.027649769585254e-06, |
|
"loss": 1.0912, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8018433179723502, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 1.981566820276498e-06, |
|
"loss": 1.1158, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 1.935483870967742e-06, |
|
"loss": 1.1269, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8110599078341014, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 1.8894009216589863e-06, |
|
"loss": 1.0894, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.815668202764977, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.8433179723502307e-06, |
|
"loss": 1.126, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8202764976958525, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 1.7972350230414748e-06, |
|
"loss": 1.1025, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8248847926267281, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 1.751152073732719e-06, |
|
"loss": 1.1413, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8294930875576036, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 1.7050691244239633e-06, |
|
"loss": 1.1502, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8341013824884793, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 1.6589861751152075e-06, |
|
"loss": 1.1985, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8387096774193549, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 1.1576, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8433179723502304, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 1.5668202764976959e-06, |
|
"loss": 1.1345, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.847926267281106, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 1.5207373271889403e-06, |
|
"loss": 1.0959, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8525345622119815, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 1.4746543778801844e-06, |
|
"loss": 1.0911, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 1.1769, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8617511520737328, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 1.382488479262673e-06, |
|
"loss": 1.1501, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8663594470046083, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 1.3364055299539171e-06, |
|
"loss": 1.1175, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8709677419354839, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 1.2903225806451614e-06, |
|
"loss": 1.1311, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8755760368663594, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 1.2442396313364057e-06, |
|
"loss": 1.1673, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.880184331797235, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 1.19815668202765e-06, |
|
"loss": 1.1911, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8847926267281107, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 1.1520737327188942e-06, |
|
"loss": 1.1032, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8894009216589862, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 1.1059907834101384e-06, |
|
"loss": 1.1286, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8940092165898618, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 1.0599078341013825e-06, |
|
"loss": 1.1544, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8986175115207373, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 1.013824884792627e-06, |
|
"loss": 1.1807, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 9.67741935483871e-07, |
|
"loss": 1.1156, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9078341013824884, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 9.216589861751154e-07, |
|
"loss": 1.1991, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9124423963133641, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 8.755760368663595e-07, |
|
"loss": 1.156, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9170506912442397, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 8.294930875576038e-07, |
|
"loss": 1.2011, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.9216589861751152, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 7.834101382488479e-07, |
|
"loss": 1.1454, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9262672811059908, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 7.373271889400922e-07, |
|
"loss": 1.1033, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.9308755760368663, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 6.912442396313365e-07, |
|
"loss": 1.1421, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9354838709677419, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 6.451612903225807e-07, |
|
"loss": 1.1448, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9400921658986175, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 5.99078341013825e-07, |
|
"loss": 1.0805, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9447004608294931, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 5.529953917050692e-07, |
|
"loss": 1.0988, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.9493087557603687, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 5.069124423963135e-07, |
|
"loss": 1.1227, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.9539170506912442, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 4.608294930875577e-07, |
|
"loss": 1.1434, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.9585253456221198, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 4.147465437788019e-07, |
|
"loss": 1.1174, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9631336405529954, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 3.686635944700461e-07, |
|
"loss": 1.1378, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.2258064516129035e-07, |
|
"loss": 1.1046, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9723502304147466, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 2.764976958525346e-07, |
|
"loss": 1.1156, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9769585253456221, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 2.3041474654377884e-07, |
|
"loss": 1.0785, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9815668202764977, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 1.8433179723502305e-07, |
|
"loss": 1.1603, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9861751152073732, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 1.382488479262673e-07, |
|
"loss": 1.1627, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9907834101382489, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 9.216589861751152e-08, |
|
"loss": 1.1749, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9953917050691244, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 4.608294930875576e-08, |
|
"loss": 1.1352, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 0.0, |
|
"loss": 1.176, |
|
"step": 217 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 217, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.833535671169188e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|