{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 0, "global_step": 217, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004608294930875576, "grad_norm": 0.4140625, "learning_rate": 9.953917050691245e-06, "loss": 1.7388, "step": 1 }, { "epoch": 0.009216589861751152, "grad_norm": 0.412109375, "learning_rate": 9.90783410138249e-06, "loss": 1.6388, "step": 2 }, { "epoch": 0.013824884792626729, "grad_norm": 0.439453125, "learning_rate": 9.861751152073733e-06, "loss": 1.7512, "step": 3 }, { "epoch": 0.018433179723502304, "grad_norm": 0.412109375, "learning_rate": 9.815668202764977e-06, "loss": 1.694, "step": 4 }, { "epoch": 0.02304147465437788, "grad_norm": 0.390625, "learning_rate": 9.769585253456221e-06, "loss": 1.672, "step": 5 }, { "epoch": 0.027649769585253458, "grad_norm": 0.390625, "learning_rate": 9.723502304147466e-06, "loss": 1.643, "step": 6 }, { "epoch": 0.03225806451612903, "grad_norm": 0.396484375, "learning_rate": 9.67741935483871e-06, "loss": 1.6822, "step": 7 }, { "epoch": 0.03686635944700461, "grad_norm": 0.37109375, "learning_rate": 9.631336405529955e-06, "loss": 1.6898, "step": 8 }, { "epoch": 0.041474654377880185, "grad_norm": 0.357421875, "learning_rate": 9.5852534562212e-06, "loss": 1.6326, "step": 9 }, { "epoch": 0.04608294930875576, "grad_norm": 0.55078125, "learning_rate": 9.539170506912442e-06, "loss": 1.649, "step": 10 }, { "epoch": 0.05069124423963134, "grad_norm": 0.388671875, "learning_rate": 9.493087557603687e-06, "loss": 1.5545, "step": 11 }, { "epoch": 0.055299539170506916, "grad_norm": 0.314453125, "learning_rate": 9.447004608294931e-06, "loss": 1.5782, "step": 12 }, { "epoch": 0.059907834101382486, "grad_norm": 0.29296875, "learning_rate": 9.400921658986176e-06, "loss": 1.52, "step": 13 }, { "epoch": 0.06451612903225806, "grad_norm": 0.2890625, "learning_rate": 9.35483870967742e-06, "loss": 1.4491, "step": 14 }, { "epoch": 0.06912442396313365, "grad_norm": 0.306640625, "learning_rate": 9.308755760368664e-06, "loss": 1.511, "step": 15 }, { "epoch": 0.07373271889400922, "grad_norm": 0.294921875, "learning_rate": 9.262672811059909e-06, "loss": 1.4852, "step": 16 }, { "epoch": 0.07834101382488479, "grad_norm": 0.3125, "learning_rate": 9.216589861751153e-06, "loss": 1.5376, "step": 17 }, { "epoch": 0.08294930875576037, "grad_norm": 0.294921875, "learning_rate": 9.170506912442398e-06, "loss": 1.5211, "step": 18 }, { "epoch": 0.08755760368663594, "grad_norm": 0.287109375, "learning_rate": 9.124423963133642e-06, "loss": 1.4544, "step": 19 }, { "epoch": 0.09216589861751152, "grad_norm": 0.30859375, "learning_rate": 9.078341013824885e-06, "loss": 1.5231, "step": 20 }, { "epoch": 0.0967741935483871, "grad_norm": 0.30078125, "learning_rate": 9.03225806451613e-06, "loss": 1.5041, "step": 21 }, { "epoch": 0.10138248847926268, "grad_norm": 0.2470703125, "learning_rate": 8.986175115207374e-06, "loss": 1.4218, "step": 22 }, { "epoch": 0.10599078341013825, "grad_norm": 0.25390625, "learning_rate": 8.940092165898619e-06, "loss": 1.4255, "step": 23 }, { "epoch": 0.11059907834101383, "grad_norm": 0.22265625, "learning_rate": 8.894009216589863e-06, "loss": 1.3793, "step": 24 }, { "epoch": 0.1152073732718894, "grad_norm": 0.2255859375, "learning_rate": 8.847926267281107e-06, "loss": 1.3534, "step": 25 }, { "epoch": 0.11981566820276497, "grad_norm": 0.22265625, "learning_rate": 8.80184331797235e-06, "loss": 1.4523, "step": 26 }, { "epoch": 0.12442396313364056, "grad_norm": 0.2119140625, "learning_rate": 8.755760368663595e-06, "loss": 1.4478, "step": 27 }, { "epoch": 0.12903225806451613, "grad_norm": 0.2216796875, "learning_rate": 8.70967741935484e-06, "loss": 1.4346, "step": 28 }, { "epoch": 0.1336405529953917, "grad_norm": 0.2158203125, "learning_rate": 8.663594470046084e-06, "loss": 1.4048, "step": 29 }, { "epoch": 0.1382488479262673, "grad_norm": 0.19921875, "learning_rate": 8.617511520737328e-06, "loss": 1.3774, "step": 30 }, { "epoch": 0.14285714285714285, "grad_norm": 0.220703125, "learning_rate": 8.571428571428571e-06, "loss": 1.3979, "step": 31 }, { "epoch": 0.14746543778801843, "grad_norm": 0.189453125, "learning_rate": 8.525345622119815e-06, "loss": 1.3433, "step": 32 }, { "epoch": 0.15207373271889402, "grad_norm": 0.1953125, "learning_rate": 8.47926267281106e-06, "loss": 1.3798, "step": 33 }, { "epoch": 0.15668202764976957, "grad_norm": 0.1884765625, "learning_rate": 8.433179723502304e-06, "loss": 1.3775, "step": 34 }, { "epoch": 0.16129032258064516, "grad_norm": 0.193359375, "learning_rate": 8.387096774193549e-06, "loss": 1.3377, "step": 35 }, { "epoch": 0.16589861751152074, "grad_norm": 0.212890625, "learning_rate": 8.341013824884793e-06, "loss": 1.3534, "step": 36 }, { "epoch": 0.17050691244239632, "grad_norm": 0.232421875, "learning_rate": 8.294930875576038e-06, "loss": 1.3038, "step": 37 }, { "epoch": 0.17511520737327188, "grad_norm": 0.16796875, "learning_rate": 8.248847926267282e-06, "loss": 1.3418, "step": 38 }, { "epoch": 0.17972350230414746, "grad_norm": 0.181640625, "learning_rate": 8.202764976958527e-06, "loss": 1.3258, "step": 39 }, { "epoch": 0.18433179723502305, "grad_norm": 0.1962890625, "learning_rate": 8.156682027649771e-06, "loss": 1.3713, "step": 40 }, { "epoch": 0.1889400921658986, "grad_norm": 0.1845703125, "learning_rate": 8.110599078341016e-06, "loss": 1.3138, "step": 41 }, { "epoch": 0.1935483870967742, "grad_norm": 0.1962890625, "learning_rate": 8.064516129032258e-06, "loss": 1.3456, "step": 42 }, { "epoch": 0.19815668202764977, "grad_norm": 0.1787109375, "learning_rate": 8.018433179723503e-06, "loss": 1.306, "step": 43 }, { "epoch": 0.20276497695852536, "grad_norm": 0.1708984375, "learning_rate": 7.972350230414747e-06, "loss": 1.3187, "step": 44 }, { "epoch": 0.2073732718894009, "grad_norm": 0.1533203125, "learning_rate": 7.926267281105992e-06, "loss": 1.2345, "step": 45 }, { "epoch": 0.2119815668202765, "grad_norm": 0.1728515625, "learning_rate": 7.880184331797236e-06, "loss": 1.2895, "step": 46 }, { "epoch": 0.21658986175115208, "grad_norm": 0.1708984375, "learning_rate": 7.83410138248848e-06, "loss": 1.3031, "step": 47 }, { "epoch": 0.22119815668202766, "grad_norm": 0.1689453125, "learning_rate": 7.788018433179724e-06, "loss": 1.3293, "step": 48 }, { "epoch": 0.22580645161290322, "grad_norm": 0.1865234375, "learning_rate": 7.741935483870968e-06, "loss": 1.2938, "step": 49 }, { "epoch": 0.2304147465437788, "grad_norm": 0.2373046875, "learning_rate": 7.695852534562212e-06, "loss": 1.2732, "step": 50 }, { "epoch": 0.2350230414746544, "grad_norm": 0.181640625, "learning_rate": 7.649769585253457e-06, "loss": 1.2642, "step": 51 }, { "epoch": 0.23963133640552994, "grad_norm": 0.154296875, "learning_rate": 7.603686635944701e-06, "loss": 1.3026, "step": 52 }, { "epoch": 0.24423963133640553, "grad_norm": 0.1591796875, "learning_rate": 7.557603686635945e-06, "loss": 1.2532, "step": 53 }, { "epoch": 0.2488479262672811, "grad_norm": 0.1494140625, "learning_rate": 7.5115207373271895e-06, "loss": 1.2107, "step": 54 }, { "epoch": 0.2534562211981567, "grad_norm": 0.158203125, "learning_rate": 7.465437788018434e-06, "loss": 1.2805, "step": 55 }, { "epoch": 0.25806451612903225, "grad_norm": 0.1572265625, "learning_rate": 7.4193548387096784e-06, "loss": 1.3163, "step": 56 }, { "epoch": 0.2626728110599078, "grad_norm": 0.1689453125, "learning_rate": 7.373271889400923e-06, "loss": 1.2855, "step": 57 }, { "epoch": 0.2672811059907834, "grad_norm": 0.189453125, "learning_rate": 7.327188940092167e-06, "loss": 1.2611, "step": 58 }, { "epoch": 0.271889400921659, "grad_norm": 0.1591796875, "learning_rate": 7.28110599078341e-06, "loss": 1.2804, "step": 59 }, { "epoch": 0.2764976958525346, "grad_norm": 0.1689453125, "learning_rate": 7.235023041474655e-06, "loss": 1.2066, "step": 60 }, { "epoch": 0.28110599078341014, "grad_norm": 0.1748046875, "learning_rate": 7.188940092165899e-06, "loss": 1.2636, "step": 61 }, { "epoch": 0.2857142857142857, "grad_norm": 0.1513671875, "learning_rate": 7.1428571428571436e-06, "loss": 1.2248, "step": 62 }, { "epoch": 0.2903225806451613, "grad_norm": 0.19921875, "learning_rate": 7.096774193548388e-06, "loss": 1.2694, "step": 63 }, { "epoch": 0.29493087557603687, "grad_norm": 0.1533203125, "learning_rate": 7.050691244239632e-06, "loss": 1.2142, "step": 64 }, { "epoch": 0.2995391705069124, "grad_norm": 0.1513671875, "learning_rate": 7.004608294930876e-06, "loss": 1.2363, "step": 65 }, { "epoch": 0.30414746543778803, "grad_norm": 0.15234375, "learning_rate": 6.958525345622121e-06, "loss": 1.2021, "step": 66 }, { "epoch": 0.3087557603686636, "grad_norm": 0.1572265625, "learning_rate": 6.912442396313365e-06, "loss": 1.2599, "step": 67 }, { "epoch": 0.31336405529953915, "grad_norm": 0.240234375, "learning_rate": 6.866359447004609e-06, "loss": 1.2112, "step": 68 }, { "epoch": 0.31797235023041476, "grad_norm": 0.1533203125, "learning_rate": 6.820276497695853e-06, "loss": 1.2716, "step": 69 }, { "epoch": 0.3225806451612903, "grad_norm": 0.15625, "learning_rate": 6.774193548387097e-06, "loss": 1.206, "step": 70 }, { "epoch": 0.3271889400921659, "grad_norm": 0.15625, "learning_rate": 6.728110599078341e-06, "loss": 1.2148, "step": 71 }, { "epoch": 0.3317972350230415, "grad_norm": 0.1494140625, "learning_rate": 6.682027649769586e-06, "loss": 1.2395, "step": 72 }, { "epoch": 0.33640552995391704, "grad_norm": 0.1513671875, "learning_rate": 6.63594470046083e-06, "loss": 1.2244, "step": 73 }, { "epoch": 0.34101382488479265, "grad_norm": 0.1552734375, "learning_rate": 6.589861751152075e-06, "loss": 1.2592, "step": 74 }, { "epoch": 0.3456221198156682, "grad_norm": 0.1513671875, "learning_rate": 6.543778801843319e-06, "loss": 1.1787, "step": 75 }, { "epoch": 0.35023041474654376, "grad_norm": 0.15234375, "learning_rate": 6.497695852534563e-06, "loss": 1.2108, "step": 76 }, { "epoch": 0.3548387096774194, "grad_norm": 0.158203125, "learning_rate": 6.451612903225806e-06, "loss": 1.2065, "step": 77 }, { "epoch": 0.35944700460829493, "grad_norm": 0.1494140625, "learning_rate": 6.405529953917051e-06, "loss": 1.1616, "step": 78 }, { "epoch": 0.3640552995391705, "grad_norm": 0.19140625, "learning_rate": 6.359447004608295e-06, "loss": 1.2286, "step": 79 }, { "epoch": 0.3686635944700461, "grad_norm": 0.1494140625, "learning_rate": 6.31336405529954e-06, "loss": 1.2252, "step": 80 }, { "epoch": 0.37327188940092165, "grad_norm": 0.158203125, "learning_rate": 6.267281105990783e-06, "loss": 1.1997, "step": 81 }, { "epoch": 0.3778801843317972, "grad_norm": 0.15234375, "learning_rate": 6.221198156682028e-06, "loss": 1.2137, "step": 82 }, { "epoch": 0.3824884792626728, "grad_norm": 0.1416015625, "learning_rate": 6.175115207373272e-06, "loss": 1.21, "step": 83 }, { "epoch": 0.3870967741935484, "grad_norm": 0.150390625, "learning_rate": 6.129032258064517e-06, "loss": 1.2207, "step": 84 }, { "epoch": 0.391705069124424, "grad_norm": 0.1455078125, "learning_rate": 6.082949308755761e-06, "loss": 1.1927, "step": 85 }, { "epoch": 0.39631336405529954, "grad_norm": 0.2109375, "learning_rate": 6.036866359447006e-06, "loss": 1.1553, "step": 86 }, { "epoch": 0.4009216589861751, "grad_norm": 0.1689453125, "learning_rate": 5.9907834101382485e-06, "loss": 1.1945, "step": 87 }, { "epoch": 0.4055299539170507, "grad_norm": 0.158203125, "learning_rate": 5.944700460829493e-06, "loss": 1.2145, "step": 88 }, { "epoch": 0.41013824884792627, "grad_norm": 0.16015625, "learning_rate": 5.8986175115207375e-06, "loss": 1.2411, "step": 89 }, { "epoch": 0.4147465437788018, "grad_norm": 0.150390625, "learning_rate": 5.852534562211982e-06, "loss": 1.1648, "step": 90 }, { "epoch": 0.41935483870967744, "grad_norm": 0.140625, "learning_rate": 5.806451612903226e-06, "loss": 1.2028, "step": 91 }, { "epoch": 0.423963133640553, "grad_norm": 0.1455078125, "learning_rate": 5.76036866359447e-06, "loss": 1.1712, "step": 92 }, { "epoch": 0.42857142857142855, "grad_norm": 0.150390625, "learning_rate": 5.7142857142857145e-06, "loss": 1.1534, "step": 93 }, { "epoch": 0.43317972350230416, "grad_norm": 0.2021484375, "learning_rate": 5.668202764976959e-06, "loss": 1.199, "step": 94 }, { "epoch": 0.4377880184331797, "grad_norm": 0.13671875, "learning_rate": 5.6221198156682035e-06, "loss": 1.1339, "step": 95 }, { "epoch": 0.4423963133640553, "grad_norm": 0.1455078125, "learning_rate": 5.576036866359448e-06, "loss": 1.1097, "step": 96 }, { "epoch": 0.4470046082949309, "grad_norm": 0.154296875, "learning_rate": 5.529953917050692e-06, "loss": 1.191, "step": 97 }, { "epoch": 0.45161290322580644, "grad_norm": 0.1376953125, "learning_rate": 5.483870967741935e-06, "loss": 1.1458, "step": 98 }, { "epoch": 0.45622119815668205, "grad_norm": 0.1494140625, "learning_rate": 5.43778801843318e-06, "loss": 1.1599, "step": 99 }, { "epoch": 0.4608294930875576, "grad_norm": 0.146484375, "learning_rate": 5.391705069124424e-06, "loss": 1.2086, "step": 100 }, { "epoch": 0.46543778801843316, "grad_norm": 0.1474609375, "learning_rate": 5.345622119815669e-06, "loss": 1.1756, "step": 101 }, { "epoch": 0.4700460829493088, "grad_norm": 0.16015625, "learning_rate": 5.299539170506913e-06, "loss": 1.1644, "step": 102 }, { "epoch": 0.47465437788018433, "grad_norm": 0.1474609375, "learning_rate": 5.253456221198157e-06, "loss": 1.1534, "step": 103 }, { "epoch": 0.4792626728110599, "grad_norm": 0.146484375, "learning_rate": 5.207373271889401e-06, "loss": 1.1422, "step": 104 }, { "epoch": 0.4838709677419355, "grad_norm": 0.34375, "learning_rate": 5.161290322580646e-06, "loss": 1.1176, "step": 105 }, { "epoch": 0.48847926267281105, "grad_norm": 0.146484375, "learning_rate": 5.11520737327189e-06, "loss": 1.191, "step": 106 }, { "epoch": 0.4930875576036866, "grad_norm": 0.13671875, "learning_rate": 5.0691244239631346e-06, "loss": 1.1478, "step": 107 }, { "epoch": 0.4976958525345622, "grad_norm": 0.142578125, "learning_rate": 5.023041474654379e-06, "loss": 1.1111, "step": 108 }, { "epoch": 0.5023041474654378, "grad_norm": 0.154296875, "learning_rate": 4.976958525345623e-06, "loss": 1.1853, "step": 109 }, { "epoch": 0.5069124423963134, "grad_norm": 0.13671875, "learning_rate": 4.930875576036866e-06, "loss": 1.0536, "step": 110 }, { "epoch": 0.511520737327189, "grad_norm": 0.1552734375, "learning_rate": 4.884792626728111e-06, "loss": 1.2063, "step": 111 }, { "epoch": 0.5161290322580645, "grad_norm": 0.1552734375, "learning_rate": 4.838709677419355e-06, "loss": 1.2118, "step": 112 }, { "epoch": 0.5207373271889401, "grad_norm": 0.13671875, "learning_rate": 4.7926267281106e-06, "loss": 1.1711, "step": 113 }, { "epoch": 0.5253456221198156, "grad_norm": 0.1474609375, "learning_rate": 4.746543778801843e-06, "loss": 1.0994, "step": 114 }, { "epoch": 0.5299539170506913, "grad_norm": 0.1787109375, "learning_rate": 4.700460829493088e-06, "loss": 1.2298, "step": 115 }, { "epoch": 0.5345622119815668, "grad_norm": 0.15625, "learning_rate": 4.654377880184332e-06, "loss": 1.1533, "step": 116 }, { "epoch": 0.5391705069124424, "grad_norm": 0.1396484375, "learning_rate": 4.608294930875577e-06, "loss": 1.1314, "step": 117 }, { "epoch": 0.543778801843318, "grad_norm": 0.1328125, "learning_rate": 4.562211981566821e-06, "loss": 1.0946, "step": 118 }, { "epoch": 0.5483870967741935, "grad_norm": 0.150390625, "learning_rate": 4.516129032258065e-06, "loss": 1.1598, "step": 119 }, { "epoch": 0.5529953917050692, "grad_norm": 0.15625, "learning_rate": 4.470046082949309e-06, "loss": 1.205, "step": 120 }, { "epoch": 0.5576036866359447, "grad_norm": 0.158203125, "learning_rate": 4.423963133640554e-06, "loss": 1.1472, "step": 121 }, { "epoch": 0.5622119815668203, "grad_norm": 0.140625, "learning_rate": 4.377880184331797e-06, "loss": 1.161, "step": 122 }, { "epoch": 0.5668202764976958, "grad_norm": 0.1455078125, "learning_rate": 4.331797235023042e-06, "loss": 1.1368, "step": 123 }, { "epoch": 0.5714285714285714, "grad_norm": 0.181640625, "learning_rate": 4.2857142857142855e-06, "loss": 1.1498, "step": 124 }, { "epoch": 0.576036866359447, "grad_norm": 0.1533203125, "learning_rate": 4.23963133640553e-06, "loss": 1.1394, "step": 125 }, { "epoch": 0.5806451612903226, "grad_norm": 0.189453125, "learning_rate": 4.193548387096774e-06, "loss": 1.118, "step": 126 }, { "epoch": 0.5852534562211982, "grad_norm": 0.140625, "learning_rate": 4.147465437788019e-06, "loss": 1.1132, "step": 127 }, { "epoch": 0.5898617511520737, "grad_norm": 0.1416015625, "learning_rate": 4.101382488479263e-06, "loss": 1.1317, "step": 128 }, { "epoch": 0.5944700460829493, "grad_norm": 0.162109375, "learning_rate": 4.055299539170508e-06, "loss": 1.1931, "step": 129 }, { "epoch": 0.5990783410138248, "grad_norm": 0.15234375, "learning_rate": 4.0092165898617514e-06, "loss": 1.1573, "step": 130 }, { "epoch": 0.6036866359447005, "grad_norm": 0.15234375, "learning_rate": 3.963133640552996e-06, "loss": 1.1818, "step": 131 }, { "epoch": 0.6082949308755761, "grad_norm": 0.171875, "learning_rate": 3.91705069124424e-06, "loss": 1.1848, "step": 132 }, { "epoch": 0.6129032258064516, "grad_norm": 0.1484375, "learning_rate": 3.870967741935484e-06, "loss": 1.2125, "step": 133 }, { "epoch": 0.6175115207373272, "grad_norm": 0.1455078125, "learning_rate": 3.8248847926267285e-06, "loss": 1.0897, "step": 134 }, { "epoch": 0.6221198156682027, "grad_norm": 0.1494140625, "learning_rate": 3.7788018433179725e-06, "loss": 1.1279, "step": 135 }, { "epoch": 0.6267281105990783, "grad_norm": 0.169921875, "learning_rate": 3.732718894009217e-06, "loss": 1.1931, "step": 136 }, { "epoch": 0.631336405529954, "grad_norm": 0.1806640625, "learning_rate": 3.6866359447004615e-06, "loss": 1.1233, "step": 137 }, { "epoch": 0.6359447004608295, "grad_norm": 0.1650390625, "learning_rate": 3.640552995391705e-06, "loss": 1.1553, "step": 138 }, { "epoch": 0.6405529953917051, "grad_norm": 0.15234375, "learning_rate": 3.5944700460829495e-06, "loss": 1.1352, "step": 139 }, { "epoch": 0.6451612903225806, "grad_norm": 0.162109375, "learning_rate": 3.548387096774194e-06, "loss": 1.1494, "step": 140 }, { "epoch": 0.6497695852534562, "grad_norm": 0.1728515625, "learning_rate": 3.502304147465438e-06, "loss": 1.1649, "step": 141 }, { "epoch": 0.6543778801843319, "grad_norm": 0.1689453125, "learning_rate": 3.4562211981566825e-06, "loss": 1.1116, "step": 142 }, { "epoch": 0.6589861751152074, "grad_norm": 0.1640625, "learning_rate": 3.4101382488479266e-06, "loss": 1.1897, "step": 143 }, { "epoch": 0.663594470046083, "grad_norm": 0.1591796875, "learning_rate": 3.3640552995391706e-06, "loss": 1.1898, "step": 144 }, { "epoch": 0.6682027649769585, "grad_norm": 0.146484375, "learning_rate": 3.317972350230415e-06, "loss": 1.125, "step": 145 }, { "epoch": 0.6728110599078341, "grad_norm": 0.1591796875, "learning_rate": 3.2718894009216596e-06, "loss": 1.133, "step": 146 }, { "epoch": 0.6774193548387096, "grad_norm": 0.16015625, "learning_rate": 3.225806451612903e-06, "loss": 1.1272, "step": 147 }, { "epoch": 0.6820276497695853, "grad_norm": 0.1396484375, "learning_rate": 3.1797235023041477e-06, "loss": 1.1471, "step": 148 }, { "epoch": 0.6866359447004609, "grad_norm": 0.1513671875, "learning_rate": 3.1336405529953917e-06, "loss": 1.1574, "step": 149 }, { "epoch": 0.6912442396313364, "grad_norm": 0.158203125, "learning_rate": 3.087557603686636e-06, "loss": 1.1739, "step": 150 }, { "epoch": 0.695852534562212, "grad_norm": 0.1552734375, "learning_rate": 3.0414746543778806e-06, "loss": 1.1835, "step": 151 }, { "epoch": 0.7004608294930875, "grad_norm": 0.1572265625, "learning_rate": 2.9953917050691243e-06, "loss": 1.1064, "step": 152 }, { "epoch": 0.7050691244239631, "grad_norm": 0.1474609375, "learning_rate": 2.9493087557603687e-06, "loss": 1.1405, "step": 153 }, { "epoch": 0.7096774193548387, "grad_norm": 0.1650390625, "learning_rate": 2.903225806451613e-06, "loss": 1.1237, "step": 154 }, { "epoch": 0.7142857142857143, "grad_norm": 0.150390625, "learning_rate": 2.8571428571428573e-06, "loss": 1.067, "step": 155 }, { "epoch": 0.7188940092165899, "grad_norm": 0.16015625, "learning_rate": 2.8110599078341017e-06, "loss": 1.0972, "step": 156 }, { "epoch": 0.7235023041474654, "grad_norm": 0.1474609375, "learning_rate": 2.764976958525346e-06, "loss": 1.1545, "step": 157 }, { "epoch": 0.728110599078341, "grad_norm": 0.1494140625, "learning_rate": 2.71889400921659e-06, "loss": 1.1446, "step": 158 }, { "epoch": 0.7327188940092166, "grad_norm": 0.1455078125, "learning_rate": 2.6728110599078343e-06, "loss": 1.1437, "step": 159 }, { "epoch": 0.7373271889400922, "grad_norm": 0.1396484375, "learning_rate": 2.6267281105990783e-06, "loss": 1.0699, "step": 160 }, { "epoch": 0.7419354838709677, "grad_norm": 0.166015625, "learning_rate": 2.580645161290323e-06, "loss": 1.1981, "step": 161 }, { "epoch": 0.7465437788018433, "grad_norm": 0.150390625, "learning_rate": 2.5345622119815673e-06, "loss": 1.1747, "step": 162 }, { "epoch": 0.7511520737327189, "grad_norm": 0.154296875, "learning_rate": 2.4884792626728113e-06, "loss": 1.1183, "step": 163 }, { "epoch": 0.7557603686635944, "grad_norm": 0.1826171875, "learning_rate": 2.4423963133640554e-06, "loss": 1.1191, "step": 164 }, { "epoch": 0.7603686635944701, "grad_norm": 0.1630859375, "learning_rate": 2.3963133640553e-06, "loss": 1.1296, "step": 165 }, { "epoch": 0.7649769585253456, "grad_norm": 0.1572265625, "learning_rate": 2.350230414746544e-06, "loss": 1.1151, "step": 166 }, { "epoch": 0.7695852534562212, "grad_norm": 0.1435546875, "learning_rate": 2.3041474654377884e-06, "loss": 1.1226, "step": 167 }, { "epoch": 0.7741935483870968, "grad_norm": 0.1474609375, "learning_rate": 2.2580645161290324e-06, "loss": 1.1928, "step": 168 }, { "epoch": 0.7788018433179723, "grad_norm": 0.15625, "learning_rate": 2.211981566820277e-06, "loss": 1.1284, "step": 169 }, { "epoch": 0.783410138248848, "grad_norm": 0.15625, "learning_rate": 2.165898617511521e-06, "loss": 1.1221, "step": 170 }, { "epoch": 0.7880184331797235, "grad_norm": 0.1591796875, "learning_rate": 2.119815668202765e-06, "loss": 1.0839, "step": 171 }, { "epoch": 0.7926267281105991, "grad_norm": 0.162109375, "learning_rate": 2.0737327188940094e-06, "loss": 1.1434, "step": 172 }, { "epoch": 0.7972350230414746, "grad_norm": 0.1591796875, "learning_rate": 2.027649769585254e-06, "loss": 1.0912, "step": 173 }, { "epoch": 0.8018433179723502, "grad_norm": 0.16015625, "learning_rate": 1.981566820276498e-06, "loss": 1.1158, "step": 174 }, { "epoch": 0.8064516129032258, "grad_norm": 0.1650390625, "learning_rate": 1.935483870967742e-06, "loss": 1.1269, "step": 175 }, { "epoch": 0.8110599078341014, "grad_norm": 0.140625, "learning_rate": 1.8894009216589863e-06, "loss": 1.0894, "step": 176 }, { "epoch": 0.815668202764977, "grad_norm": 0.197265625, "learning_rate": 1.8433179723502307e-06, "loss": 1.126, "step": 177 }, { "epoch": 0.8202764976958525, "grad_norm": 0.15234375, "learning_rate": 1.7972350230414748e-06, "loss": 1.1025, "step": 178 }, { "epoch": 0.8248847926267281, "grad_norm": 0.1650390625, "learning_rate": 1.751152073732719e-06, "loss": 1.1413, "step": 179 }, { "epoch": 0.8294930875576036, "grad_norm": 0.1796875, "learning_rate": 1.7050691244239633e-06, "loss": 1.1502, "step": 180 }, { "epoch": 0.8341013824884793, "grad_norm": 0.1611328125, "learning_rate": 1.6589861751152075e-06, "loss": 1.1985, "step": 181 }, { "epoch": 0.8387096774193549, "grad_norm": 0.1630859375, "learning_rate": 1.6129032258064516e-06, "loss": 1.1576, "step": 182 }, { "epoch": 0.8433179723502304, "grad_norm": 0.146484375, "learning_rate": 1.5668202764976959e-06, "loss": 1.1345, "step": 183 }, { "epoch": 0.847926267281106, "grad_norm": 0.1455078125, "learning_rate": 1.5207373271889403e-06, "loss": 1.0959, "step": 184 }, { "epoch": 0.8525345622119815, "grad_norm": 0.1630859375, "learning_rate": 1.4746543778801844e-06, "loss": 1.0911, "step": 185 }, { "epoch": 0.8571428571428571, "grad_norm": 0.16015625, "learning_rate": 1.4285714285714286e-06, "loss": 1.1769, "step": 186 }, { "epoch": 0.8617511520737328, "grad_norm": 0.154296875, "learning_rate": 1.382488479262673e-06, "loss": 1.1501, "step": 187 }, { "epoch": 0.8663594470046083, "grad_norm": 0.166015625, "learning_rate": 1.3364055299539171e-06, "loss": 1.1175, "step": 188 }, { "epoch": 0.8709677419354839, "grad_norm": 0.14453125, "learning_rate": 1.2903225806451614e-06, "loss": 1.1311, "step": 189 }, { "epoch": 0.8755760368663594, "grad_norm": 0.1689453125, "learning_rate": 1.2442396313364057e-06, "loss": 1.1673, "step": 190 }, { "epoch": 0.880184331797235, "grad_norm": 0.154296875, "learning_rate": 1.19815668202765e-06, "loss": 1.1911, "step": 191 }, { "epoch": 0.8847926267281107, "grad_norm": 0.1484375, "learning_rate": 1.1520737327188942e-06, "loss": 1.1032, "step": 192 }, { "epoch": 0.8894009216589862, "grad_norm": 0.1748046875, "learning_rate": 1.1059907834101384e-06, "loss": 1.1286, "step": 193 }, { "epoch": 0.8940092165898618, "grad_norm": 0.1572265625, "learning_rate": 1.0599078341013825e-06, "loss": 1.1544, "step": 194 }, { "epoch": 0.8986175115207373, "grad_norm": 0.1572265625, "learning_rate": 1.013824884792627e-06, "loss": 1.1807, "step": 195 }, { "epoch": 0.9032258064516129, "grad_norm": 0.1875, "learning_rate": 9.67741935483871e-07, "loss": 1.1156, "step": 196 }, { "epoch": 0.9078341013824884, "grad_norm": 0.1728515625, "learning_rate": 9.216589861751154e-07, "loss": 1.1991, "step": 197 }, { "epoch": 0.9124423963133641, "grad_norm": 0.1640625, "learning_rate": 8.755760368663595e-07, "loss": 1.156, "step": 198 }, { "epoch": 0.9170506912442397, "grad_norm": 0.158203125, "learning_rate": 8.294930875576038e-07, "loss": 1.2011, "step": 199 }, { "epoch": 0.9216589861751152, "grad_norm": 0.1572265625, "learning_rate": 7.834101382488479e-07, "loss": 1.1454, "step": 200 }, { "epoch": 0.9262672811059908, "grad_norm": 0.171875, "learning_rate": 7.373271889400922e-07, "loss": 1.1033, "step": 201 }, { "epoch": 0.9308755760368663, "grad_norm": 0.1484375, "learning_rate": 6.912442396313365e-07, "loss": 1.1421, "step": 202 }, { "epoch": 0.9354838709677419, "grad_norm": 0.1484375, "learning_rate": 6.451612903225807e-07, "loss": 1.1448, "step": 203 }, { "epoch": 0.9400921658986175, "grad_norm": 0.1669921875, "learning_rate": 5.99078341013825e-07, "loss": 1.0805, "step": 204 }, { "epoch": 0.9447004608294931, "grad_norm": 0.146484375, "learning_rate": 5.529953917050692e-07, "loss": 1.0988, "step": 205 }, { "epoch": 0.9493087557603687, "grad_norm": 0.1484375, "learning_rate": 5.069124423963135e-07, "loss": 1.1227, "step": 206 }, { "epoch": 0.9539170506912442, "grad_norm": 0.16796875, "learning_rate": 4.608294930875577e-07, "loss": 1.1434, "step": 207 }, { "epoch": 0.9585253456221198, "grad_norm": 0.166015625, "learning_rate": 4.147465437788019e-07, "loss": 1.1174, "step": 208 }, { "epoch": 0.9631336405529954, "grad_norm": 0.150390625, "learning_rate": 3.686635944700461e-07, "loss": 1.1378, "step": 209 }, { "epoch": 0.967741935483871, "grad_norm": 0.1552734375, "learning_rate": 3.2258064516129035e-07, "loss": 1.1046, "step": 210 }, { "epoch": 0.9723502304147466, "grad_norm": 0.158203125, "learning_rate": 2.764976958525346e-07, "loss": 1.1156, "step": 211 }, { "epoch": 0.9769585253456221, "grad_norm": 0.2197265625, "learning_rate": 2.3041474654377884e-07, "loss": 1.0785, "step": 212 }, { "epoch": 0.9815668202764977, "grad_norm": 0.1611328125, "learning_rate": 1.8433179723502305e-07, "loss": 1.1603, "step": 213 }, { "epoch": 0.9861751152073732, "grad_norm": 0.1630859375, "learning_rate": 1.382488479262673e-07, "loss": 1.1627, "step": 214 }, { "epoch": 0.9907834101382489, "grad_norm": 0.1572265625, "learning_rate": 9.216589861751152e-08, "loss": 1.1749, "step": 215 }, { "epoch": 0.9953917050691244, "grad_norm": 0.158203125, "learning_rate": 4.608294930875576e-08, "loss": 1.1352, "step": 216 }, { "epoch": 1.0, "grad_norm": 0.150390625, "learning_rate": 0.0, "loss": 1.176, "step": 217 } ], "logging_steps": 1.0, "max_steps": 217, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.833535671169188e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }